From de8a76da2f374792594ce03a203b3f30e4889f6f Mon Sep 17 00:00:00 2001 From: Sebastian Huber Date: Tue, 4 Apr 2017 09:36:57 +0200 Subject: Update to FreeBSD head 2017-04-04 Git mirror commit 642b174daddbd0efd9bb5f242c43f4ab4db6869f. --- freebsd/sys/arm/include/machine/cpufunc.h | 4 +- freebsd/sys/bsm/audit.h | 18 + freebsd/sys/bsm/audit_kevents.h | 33 +- freebsd/sys/cam/cam_ccb.h | 7 + freebsd/sys/cam/cam_periph.h | 26 +- freebsd/sys/cam/cam_xpt.h | 6 + freebsd/sys/cam/scsi/scsi_all.c | 24 +- freebsd/sys/cam/scsi/scsi_all.h | 2 +- freebsd/sys/crypto/intake.h | 64 + freebsd/sys/dev/bce/if_bce.c | 2 +- freebsd/sys/dev/e1000/e1000_82575.c | 1 - freebsd/sys/dev/e1000/e1000_82575.h | 1 + freebsd/sys/dev/e1000/e1000_defines.h | 2 + freebsd/sys/dev/e1000/e1000_ich8lan.c | 82 +- freebsd/sys/dev/e1000/em_txrx.c | 817 +++ freebsd/sys/dev/e1000/if_em.c | 5940 ++++++---------- freebsd/sys/dev/e1000/if_em.h | 342 +- freebsd/sys/dev/e1000/if_igb.c | 6452 ------------------ freebsd/sys/dev/e1000/if_igb.h | 634 -- freebsd/sys/dev/e1000/if_lem.c | 4732 ------------- freebsd/sys/dev/e1000/if_lem.h | 519 -- freebsd/sys/dev/e1000/igb_txrx.c | 586 ++ freebsd/sys/dev/fdt/fdt_common.h | 6 - freebsd/sys/dev/fdt/simplebus.c | 2 +- freebsd/sys/dev/mmc/bridge.h | 63 +- freebsd/sys/dev/mmc/mmc.c | 837 ++- freebsd/sys/dev/mmc/mmc_ioctl.h | 64 + freebsd/sys/dev/mmc/mmc_private.h | 69 + freebsd/sys/dev/mmc/mmc_subr.c | 254 + freebsd/sys/dev/mmc/mmc_subr.h | 72 + freebsd/sys/dev/mmc/mmcbrvar.h | 25 +- freebsd/sys/dev/mmc/mmcreg.h | 196 +- freebsd/sys/dev/mmc/mmcsd.c | 1134 ++- freebsd/sys/dev/mmc/mmcvar.h | 6 +- freebsd/sys/dev/nvme/nvme.h | 3 +- freebsd/sys/dev/ofw/ofw_bus_subr.c | 21 +- freebsd/sys/dev/ofw/ofw_bus_subr.h | 1 + freebsd/sys/dev/ofw/ofw_fdt.c | 6 + freebsd/sys/dev/pci/pci.c | 9 +- freebsd/sys/dev/pci/pci_pci.c | 106 +- freebsd/sys/dev/pci/pci_private.h | 2 +- freebsd/sys/dev/pci/pcib_private.h | 2 +- freebsd/sys/dev/pci/pcireg.h | 21 + freebsd/sys/dev/rtwn/if_rtwn.c | 57 +- freebsd/sys/dev/rtwn/if_rtwn_rx.c | 153 +- freebsd/sys/dev/rtwn/if_rtwn_rx.h | 2 +- freebsd/sys/dev/rtwn/if_rtwn_tx.c | 18 +- freebsd/sys/dev/rtwn/if_rtwnvar.h | 23 +- freebsd/sys/dev/rtwn/pci/rtwn_pci_attach.c | 34 +- freebsd/sys/dev/rtwn/pci/rtwn_pci_rx.c | 23 +- freebsd/sys/dev/rtwn/rtl8188e/r88e.h | 2 + freebsd/sys/dev/rtwn/rtl8188e/r88e_fw.c | 2 +- freebsd/sys/dev/rtwn/rtl8188e/r88e_rx.c | 17 + freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c | 2 + freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_attach.c | 2 + freebsd/sys/dev/rtwn/rtl8192c/r92c.h | 6 + freebsd/sys/dev/rtwn/rtl8192c/r92c_fw.c | 2 +- freebsd/sys/dev/rtwn/rtl8192c/r92c_init.c | 26 + freebsd/sys/dev/rtwn/rtl8192c/r92c_reg.h | 26 +- freebsd/sys/dev/rtwn/rtl8192c/r92c_rx.c | 44 + freebsd/sys/dev/rtwn/rtl8192c/r92c_rx_desc.h | 5 + freebsd/sys/dev/rtwn/rtl8192c/r92c_tx_desc.h | 2 +- freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_attach.c | 2 + freebsd/sys/dev/rtwn/rtl8812a/r12a.h | 2 + freebsd/sys/dev/rtwn/rtl8812a/r12a_beacon.c | 2 + freebsd/sys/dev/rtwn/rtl8812a/r12a_fw.c | 4 +- freebsd/sys/dev/rtwn/rtl8812a/r12a_rx.c | 91 +- freebsd/sys/dev/rtwn/rtl8812a/r12a_rx_desc.h | 8 + freebsd/sys/dev/rtwn/rtl8812a/r12a_tx.c | 12 + freebsd/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c | 12 +- freebsd/sys/dev/rtwn/rtl8821a/r21a_init.c | 2 +- freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c | 5 +- freebsd/sys/dev/rtwn/usb/rtwn_usb_attach.h | 13 +- freebsd/sys/dev/rtwn/usb/rtwn_usb_ep.c | 8 +- freebsd/sys/dev/rtwn/usb/rtwn_usb_rx.c | 26 +- freebsd/sys/dev/tsec/if_tsec.c | 311 +- freebsd/sys/dev/tsec/if_tsec.h | 118 +- freebsd/sys/dev/usb/quirk/usb_quirk.c | 2 + freebsd/sys/dev/usb/quirk/usb_quirk.h | 1 + freebsd/sys/dev/usb/usb_hub.c | 4 +- freebsd/sys/dev/usb/usb_pf.h | 2 +- freebsd/sys/dev/usb/wlan/if_rsu.c | 6 + freebsd/sys/dev/usb/wlan/if_rum.c | 19 +- freebsd/sys/dev/usb/wlan/if_run.c | 6 +- freebsd/sys/dev/usb/wlan/if_ural.c | 12 +- freebsd/sys/dev/usb/wlan/if_urtw.c | 7 +- freebsd/sys/dev/usb/wlan/if_zyd.c | 9 +- freebsd/sys/i386/i386/in_cksum.c | 2 +- freebsd/sys/i386/include/machine/cpufunc.h | 41 +- freebsd/sys/isa/isavar.h | 13 - freebsd/sys/kern/init_main.c | 2 +- freebsd/sys/kern/kern_condvar.c | 10 +- freebsd/sys/kern/kern_event.c | 29 +- freebsd/sys/kern/kern_linker.c | 3 +- freebsd/sys/kern/kern_mib.c | 5 + freebsd/sys/kern/kern_synch.c | 39 +- freebsd/sys/kern/kern_sysctl.c | 3 +- freebsd/sys/kern/kern_time.c | 145 +- freebsd/sys/kern/kern_timeout.c | 8 +- freebsd/sys/kern/subr_bus.c | 6 +- freebsd/sys/kern/subr_lock.c | 64 +- freebsd/sys/kern/subr_prf.c | 54 +- freebsd/sys/kern/subr_sleepqueue.c | 199 +- freebsd/sys/kern/subr_taskqueue.c | 17 + freebsd/sys/kern/subr_uio.c | 9 +- freebsd/sys/kern/subr_unit.c | 17 +- freebsd/sys/kern/sys_generic.c | 73 +- freebsd/sys/kern/uipc_mbuf.c | 4 +- freebsd/sys/kern/uipc_mbuf2.c | 2 +- freebsd/sys/kern/uipc_sockbuf.c | 5 + freebsd/sys/kern/uipc_socket.c | 77 +- freebsd/sys/kern/uipc_syscalls.c | 46 +- freebsd/sys/kern/uipc_usrreq.c | 28 + freebsd/sys/libkern/bcd.c | 4 + freebsd/sys/libkern/inet_ntoa.c | 14 - freebsd/sys/libkern/random.c | 4 +- freebsd/sys/net/bpf.c | 6 +- freebsd/sys/net/bpf.h | 1054 +-- freebsd/sys/net/bpf_buffer.c | 2 +- freebsd/sys/net/bpf_filter.c | 2 +- freebsd/sys/net/bpfdesc.h | 2 +- freebsd/sys/net/dlt.h | 1338 ++++ freebsd/sys/net/ieee8023ad_lacp.c | 56 +- freebsd/sys/net/ieee8023ad_lacp.h | 3 + freebsd/sys/net/if.c | 65 +- freebsd/sys/net/if.h | 3 +- freebsd/sys/net/if_arc.h | 2 +- freebsd/sys/net/if_arp.h | 2 +- freebsd/sys/net/if_bridge.c | 6 + freebsd/sys/net/if_bridgevar.h | 1 + freebsd/sys/net/if_clone.c | 2 +- freebsd/sys/net/if_clone.h | 2 +- freebsd/sys/net/if_dead.c | 28 + freebsd/sys/net/if_disc.c | 2 +- freebsd/sys/net/if_dl.h | 2 +- freebsd/sys/net/if_edsc.c | 2 +- freebsd/sys/net/if_enc.c | 1 + freebsd/sys/net/if_epair.c | 6 +- freebsd/sys/net/if_ethersubr.c | 2 +- freebsd/sys/net/if_fddisubr.c | 2 +- freebsd/sys/net/if_fwsubr.c | 2 +- freebsd/sys/net/if_iso88025subr.c | 2 +- freebsd/sys/net/if_lagg.c | 74 +- freebsd/sys/net/if_lagg.h | 2 + freebsd/sys/net/if_llc.h | 2 +- freebsd/sys/net/if_loop.c | 2 +- freebsd/sys/net/if_media.c | 1 + freebsd/sys/net/if_stf.c | 24 +- freebsd/sys/net/if_types.h | 4 +- freebsd/sys/net/if_var.h | 67 +- freebsd/sys/net/if_vlan.c | 33 + freebsd/sys/net/iflib.h | 393 ++ freebsd/sys/net/ifq.h | 2 +- freebsd/sys/net/netisr.c | 5 +- freebsd/sys/net/pfil.c | 27 +- freebsd/sys/net/pfil.h | 33 +- freebsd/sys/net/pfkeyv2.h | 9 +- freebsd/sys/net/pfvar.h | 2 + freebsd/sys/net/radix.c | 2 +- freebsd/sys/net/radix.h | 2 +- freebsd/sys/net/raw_cb.c | 2 +- freebsd/sys/net/raw_cb.h | 2 +- freebsd/sys/net/raw_usrreq.c | 2 +- freebsd/sys/net/route.c | 2 +- freebsd/sys/net/route.h | 6 +- freebsd/sys/net/route_var.h | 2 +- freebsd/sys/net/rtsock.c | 2 +- freebsd/sys/net/slcompress.c | 2 +- freebsd/sys/net/slcompress.h | 2 +- freebsd/sys/net80211/_ieee80211.h | 124 +- freebsd/sys/net80211/ieee80211.c | 327 +- freebsd/sys/net80211/ieee80211.h | 48 +- freebsd/sys/net80211/ieee80211_adhoc.c | 6 +- freebsd/sys/net80211/ieee80211_freebsd.c | 26 + freebsd/sys/net80211/ieee80211_freebsd.h | 93 +- freebsd/sys/net80211/ieee80211_hostap.c | 41 + freebsd/sys/net80211/ieee80211_ht.c | 409 +- freebsd/sys/net80211/ieee80211_ht.h | 8 +- freebsd/sys/net80211/ieee80211_input.c | 26 + freebsd/sys/net80211/ieee80211_input.h | 13 + freebsd/sys/net80211/ieee80211_ioctl.c | 50 +- freebsd/sys/net80211/ieee80211_node.c | 169 +- freebsd/sys/net80211/ieee80211_node.h | 6 + freebsd/sys/net80211/ieee80211_output.c | 300 +- freebsd/sys/net80211/ieee80211_proto.c | 44 +- freebsd/sys/net80211/ieee80211_proto.h | 2 + freebsd/sys/net80211/ieee80211_scan_sta.c | 46 +- freebsd/sys/net80211/ieee80211_sta.c | 88 +- freebsd/sys/net80211/ieee80211_superg.c | 22 +- freebsd/sys/net80211/ieee80211_superg.h | 2 + freebsd/sys/net80211/ieee80211_tdma.c | 2 + freebsd/sys/net80211/ieee80211_var.h | 37 +- freebsd/sys/net80211/ieee80211_vht.c | 855 +++ freebsd/sys/net80211/ieee80211_vht.h | 68 + freebsd/sys/netinet/cc/cc.h | 2 +- freebsd/sys/netinet/icmp6.h | 2 +- freebsd/sys/netinet/icmp_var.h | 2 +- freebsd/sys/netinet/if_ether.c | 30 +- freebsd/sys/netinet/if_ether.h | 2 +- freebsd/sys/netinet/igmp.c | 111 +- freebsd/sys/netinet/igmp.h | 2 +- freebsd/sys/netinet/igmp_var.h | 2 +- freebsd/sys/netinet/in.c | 28 +- freebsd/sys/netinet/in.h | 5 +- freebsd/sys/netinet/in_fib.c | 2 +- freebsd/sys/netinet/in_fib.h | 2 +- freebsd/sys/netinet/in_mcast.c | 89 +- freebsd/sys/netinet/in_pcb.c | 319 +- freebsd/sys/netinet/in_pcb.h | 132 +- freebsd/sys/netinet/in_proto.c | 40 +- freebsd/sys/netinet/in_systm.h | 2 +- freebsd/sys/netinet/in_var.h | 2 +- freebsd/sys/netinet/ip.h | 2 +- freebsd/sys/netinet/ip6.h | 2 +- freebsd/sys/netinet/ip_carp.c | 7 +- freebsd/sys/netinet/ip_carp.h | 4 +- freebsd/sys/netinet/ip_divert.c | 10 +- freebsd/sys/netinet/ip_fw.h | 1 + freebsd/sys/netinet/ip_icmp.c | 25 +- freebsd/sys/netinet/ip_icmp.h | 2 +- freebsd/sys/netinet/ip_input.c | 151 +- freebsd/sys/netinet/ip_ipsec.c | 409 -- freebsd/sys/netinet/ip_ipsec.h | 40 - freebsd/sys/netinet/ip_mroute.c | 41 +- freebsd/sys/netinet/ip_mroute.h | 2 +- freebsd/sys/netinet/ip_options.c | 13 +- freebsd/sys/netinet/ip_options.h | 2 +- freebsd/sys/netinet/ip_output.c | 122 +- freebsd/sys/netinet/ip_reass.c | 2 +- freebsd/sys/netinet/ip_var.h | 2 +- freebsd/sys/netinet/libalias/alias_local.h | 6 + freebsd/sys/netinet/libalias/alias_nbt.c | 32 +- freebsd/sys/netinet/libalias/alias_proxy.c | 7 +- freebsd/sys/netinet/libalias/alias_sctp.c | 14 +- freebsd/sys/netinet/raw_ip.c | 22 +- freebsd/sys/netinet/sctp_input.c | 28 - freebsd/sys/netinet/sctp_os_bsd.h | 10 - freebsd/sys/netinet/sctp_output.c | 79 +- freebsd/sys/netinet/sctp_pcb.c | 18 - freebsd/sys/netinet/sctp_timer.c | 8 +- freebsd/sys/netinet/sctp_usrreq.c | 2 +- freebsd/sys/netinet/tcp.h | 2 +- freebsd/sys/netinet/tcp_debug.c | 2 +- freebsd/sys/netinet/tcp_debug.h | 2 +- freebsd/sys/netinet/tcp_fsm.h | 2 +- freebsd/sys/netinet/tcp_hostcache.c | 4 +- freebsd/sys/netinet/tcp_input.c | 120 +- freebsd/sys/netinet/tcp_output.c | 86 +- freebsd/sys/netinet/tcp_reass.c | 2 +- freebsd/sys/netinet/tcp_seq.h | 2 +- freebsd/sys/netinet/tcp_subr.c | 455 +- freebsd/sys/netinet/tcp_syncache.c | 173 +- freebsd/sys/netinet/tcp_syncache.h | 2 +- freebsd/sys/netinet/tcp_timer.c | 37 +- freebsd/sys/netinet/tcp_timer.h | 11 +- freebsd/sys/netinet/tcp_timewait.c | 2 +- freebsd/sys/netinet/tcp_usrreq.c | 32 +- freebsd/sys/netinet/tcp_var.h | 277 +- freebsd/sys/netinet/tcpip.h | 2 +- freebsd/sys/netinet/udp.h | 4 +- freebsd/sys/netinet/udp_usrreq.c | 285 +- freebsd/sys/netinet/udp_var.h | 2 +- freebsd/sys/netinet6/frag6.c | 9 + freebsd/sys/netinet6/icmp6.c | 6 +- freebsd/sys/netinet6/in6.c | 24 +- freebsd/sys/netinet6/in6.h | 8 +- freebsd/sys/netinet6/in6_cksum.c | 2 +- freebsd/sys/netinet6/in6_fib.c | 2 +- freebsd/sys/netinet6/in6_fib.h | 2 +- freebsd/sys/netinet6/in6_ifattach.c | 1 - freebsd/sys/netinet6/in6_pcb.c | 6 +- freebsd/sys/netinet6/in6_pcb.h | 4 +- freebsd/sys/netinet6/in6_proto.c | 56 +- freebsd/sys/netinet6/in6_src.c | 6 +- freebsd/sys/netinet6/in6_var.h | 2 +- freebsd/sys/netinet6/ip6_forward.c | 229 +- freebsd/sys/netinet6/ip6_input.c | 97 +- freebsd/sys/netinet6/ip6_ipsec.c | 393 -- freebsd/sys/netinet6/ip6_ipsec.h | 42 - freebsd/sys/netinet6/ip6_mroute.c | 2 +- freebsd/sys/netinet6/ip6_output.c | 145 +- freebsd/sys/netinet6/ip6_var.h | 2 +- freebsd/sys/netinet6/ip6protosw.h | 2 +- freebsd/sys/netinet6/mld6.c | 2 +- freebsd/sys/netinet6/nd6.c | 74 +- freebsd/sys/netinet6/nd6.h | 1 + freebsd/sys/netinet6/nd6_nbr.c | 9 +- freebsd/sys/netinet6/nd6_rtr.c | 108 +- freebsd/sys/netinet6/raw_ip6.c | 28 +- freebsd/sys/netinet6/sctp6_usrreq.c | 9 - freebsd/sys/netinet6/tcp6_var.h | 2 +- freebsd/sys/netinet6/udp6_usrreq.c | 51 +- freebsd/sys/netinet6/udp6_var.h | 2 +- freebsd/sys/netipsec/ah_var.h | 48 +- freebsd/sys/netipsec/esp.h | 6 +- freebsd/sys/netipsec/esp_var.h | 50 +- freebsd/sys/netipsec/ipcomp_var.h | 47 +- freebsd/sys/netipsec/ipip_var.h | 70 - freebsd/sys/netipsec/ipsec.c | 1826 ++--- freebsd/sys/netipsec/ipsec.h | 351 +- freebsd/sys/netipsec/ipsec6.h | 37 +- freebsd/sys/netipsec/ipsec_input.c | 741 +- freebsd/sys/netipsec/ipsec_mbuf.c | 34 +- freebsd/sys/netipsec/ipsec_mod.c | 150 + freebsd/sys/netipsec/ipsec_output.c | 1451 ++-- freebsd/sys/netipsec/ipsec_pcb.c | 481 ++ freebsd/sys/netipsec/ipsec_support.h | 190 + freebsd/sys/netipsec/key.c | 7195 ++++++++++---------- freebsd/sys/netipsec/key.h | 93 +- freebsd/sys/netipsec/key_debug.c | 488 +- freebsd/sys/netipsec/key_debug.h | 41 +- freebsd/sys/netipsec/key_var.h | 17 - freebsd/sys/netipsec/keydb.h | 187 +- freebsd/sys/netipsec/keysock.c | 70 +- freebsd/sys/netipsec/keysock.h | 54 +- freebsd/sys/netipsec/subr_ipsec.c | 356 + freebsd/sys/netipsec/udpencap.c | 299 + freebsd/sys/netipsec/xform.h | 82 +- freebsd/sys/netipsec/xform_ah.c | 523 +- freebsd/sys/netipsec/xform_esp.c | 696 +- freebsd/sys/netipsec/xform_ipcomp.c | 426 +- freebsd/sys/netipsec/xform_ipip.c | 728 -- freebsd/sys/netipsec/xform_tcp.c | 354 +- freebsd/sys/netpfil/ipfw/dn_aqm_pie.h | 6 +- freebsd/sys/netpfil/ipfw/dn_heap.h | 2 +- freebsd/sys/netpfil/ipfw/ip_fw2.c | 199 +- freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c | 1 + freebsd/sys/netpfil/ipfw/ip_fw_eaction.c | 23 +- freebsd/sys/netpfil/ipfw/ip_fw_iface.c | 1 + freebsd/sys/netpfil/ipfw/ip_fw_log.c | 8 +- freebsd/sys/netpfil/ipfw/ip_fw_nat.c | 1 + freebsd/sys/netpfil/ipfw/ip_fw_private.h | 26 +- freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c | 27 +- freebsd/sys/netpfil/ipfw/ip_fw_table.c | 22 +- freebsd/sys/netpfil/ipfw/ip_fw_table_value.c | 1 + freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c | 2 +- freebsd/sys/netpfil/ipfw/nat64/nat64stl.c | 12 +- freebsd/sys/netpfil/ipfw/nptv6/nptv6.c | 30 +- freebsd/sys/netpfil/pf/pf.c | 63 +- freebsd/sys/netpfil/pf/pf_ioctl.c | 14 +- freebsd/sys/netpfil/pf/pf_lb.c | 2 +- freebsd/sys/netpfil/pf/pf_osfp.c | 15 +- freebsd/sys/opencrypto/crypto.c | 38 +- freebsd/sys/powerpc/include/machine/cpufunc.h | 2 +- freebsd/sys/powerpc/include/machine/intr_machdep.h | 64 + freebsd/sys/powerpc/include/machine/psl.h | 8 + freebsd/sys/powerpc/include/machine/spr.h | 15 + freebsd/sys/security/audit/audit.h | 57 + freebsd/sys/sparc64/sparc64/in_cksum.c | 2 +- freebsd/sys/sys/_callout.h | 2 +- freebsd/sys/sys/_sockaddr_storage.h | 2 +- freebsd/sys/sys/ata.h | 2 +- freebsd/sys/sys/bitstring.h | 2 +- freebsd/sys/sys/buf.h | 2 +- freebsd/sys/sys/buf_ring.h | 6 +- freebsd/sys/sys/bufobj.h | 6 + freebsd/sys/sys/bus.h | 13 +- freebsd/sys/sys/callout.h | 2 +- freebsd/sys/sys/capability.h | 1 + freebsd/sys/sys/conf.h | 2 +- freebsd/sys/sys/ctype.h | 2 +- freebsd/sys/sys/domain.h | 2 +- freebsd/sys/sys/eventvar.h | 2 +- freebsd/sys/sys/file.h | 10 +- freebsd/sys/sys/filedesc.h | 2 +- freebsd/sys/sys/gtaskqueue.h | 124 + freebsd/sys/sys/kernel.h | 2 +- freebsd/sys/sys/libkern.h | 40 +- freebsd/sys/sys/limits.h | 2 +- freebsd/sys/sys/linker.h | 5 +- freebsd/sys/sys/lockmgr.h | 4 + freebsd/sys/sys/lockstat.h | 15 +- freebsd/sys/sys/malloc.h | 2 +- freebsd/sys/sys/mbuf.h | 28 +- freebsd/sys/sys/mount.h | 2 +- freebsd/sys/sys/mutex.h | 56 +- freebsd/sys/sys/nlist_aout.h | 2 +- freebsd/sys/sys/nv.h | 246 + freebsd/sys/sys/pcpu.h | 2 +- freebsd/sys/sys/pipe.h | 2 +- freebsd/sys/sys/proc.h | 29 +- freebsd/sys/sys/protosw.h | 2 +- freebsd/sys/sys/reboot.h | 2 +- freebsd/sys/sys/resourcevar.h | 2 +- freebsd/sys/sys/rwlock.h | 38 +- freebsd/sys/sys/sbuf.h | 1 + freebsd/sys/sys/sdt.h | 4 + freebsd/sys/sys/selinfo.h | 2 +- freebsd/sys/sys/sigio.h | 2 +- freebsd/sys/sys/signalvar.h | 10 +- freebsd/sys/sys/sleepqueue.h | 3 + freebsd/sys/sys/slicer.h | 24 +- freebsd/sys/sys/sockbuf.h | 2 +- freebsd/sys/sys/socket.h | 57 +- freebsd/sys/sys/socketvar.h | 9 +- freebsd/sys/sys/sockopt.h | 4 +- freebsd/sys/sys/sockstate.h | 2 +- freebsd/sys/sys/stdint.h | 7 + freebsd/sys/sys/sx.h | 77 +- freebsd/sys/sys/sysctl.h | 5 +- freebsd/sys/sys/syslog.h | 2 +- freebsd/sys/sys/sysproto.h | 133 +- freebsd/sys/sys/systm.h | 103 +- freebsd/sys/sys/taskqueue.h | 3 +- freebsd/sys/sys/tty.h | 1 + freebsd/sys/sys/ttyqueue.h | 4 +- freebsd/sys/sys/ucred.h | 2 +- freebsd/sys/sys/un.h | 2 +- freebsd/sys/sys/unpcb.h | 2 +- freebsd/sys/sys/user.h | 2 +- freebsd/sys/sys/vmmeter.h | 2 +- freebsd/sys/sys/vnode.h | 2 +- freebsd/sys/vm/uma.h | 2 +- freebsd/sys/vm/uma_core.c | 45 +- freebsd/sys/vm/uma_int.h | 1 - freebsd/sys/vm/vm.h | 3 +- freebsd/sys/vm/vm_extern.h | 3 +- freebsd/sys/x86/pci/pci_bus.c | 7 +- 418 files changed, 23971 insertions(+), 30978 deletions(-) create mode 100644 freebsd/sys/crypto/intake.h create mode 100644 freebsd/sys/dev/e1000/em_txrx.c delete mode 100644 freebsd/sys/dev/e1000/if_igb.c delete mode 100644 freebsd/sys/dev/e1000/if_igb.h delete mode 100644 freebsd/sys/dev/e1000/if_lem.c delete mode 100644 freebsd/sys/dev/e1000/if_lem.h create mode 100644 freebsd/sys/dev/e1000/igb_txrx.c create mode 100644 freebsd/sys/dev/mmc/mmc_ioctl.h create mode 100644 freebsd/sys/dev/mmc/mmc_private.h create mode 100644 freebsd/sys/dev/mmc/mmc_subr.c create mode 100644 freebsd/sys/dev/mmc/mmc_subr.h create mode 100644 freebsd/sys/net/dlt.h create mode 100644 freebsd/sys/net/iflib.h create mode 100644 freebsd/sys/net80211/ieee80211_vht.c create mode 100644 freebsd/sys/net80211/ieee80211_vht.h delete mode 100644 freebsd/sys/netinet/ip_ipsec.c delete mode 100644 freebsd/sys/netinet/ip_ipsec.h delete mode 100644 freebsd/sys/netinet6/ip6_ipsec.c delete mode 100644 freebsd/sys/netinet6/ip6_ipsec.h delete mode 100644 freebsd/sys/netipsec/ipip_var.h create mode 100644 freebsd/sys/netipsec/ipsec_mod.c create mode 100644 freebsd/sys/netipsec/ipsec_pcb.c create mode 100644 freebsd/sys/netipsec/ipsec_support.h create mode 100644 freebsd/sys/netipsec/subr_ipsec.c create mode 100644 freebsd/sys/netipsec/udpencap.c delete mode 100644 freebsd/sys/netipsec/xform_ipip.c create mode 100644 freebsd/sys/powerpc/include/machine/intr_machdep.h create mode 100644 freebsd/sys/sys/gtaskqueue.h create mode 100644 freebsd/sys/sys/nv.h (limited to 'freebsd/sys') diff --git a/freebsd/sys/arm/include/machine/cpufunc.h b/freebsd/sys/arm/include/machine/cpufunc.h index 8a9a2a84..18c57f7f 100644 --- a/freebsd/sys/arm/include/machine/cpufunc.h +++ b/freebsd/sys/arm/include/machine/cpufunc.h @@ -49,7 +49,6 @@ #include #include -#include static __inline void breakpoint(void) @@ -278,8 +277,7 @@ void sheeva_l2cache_wbinv_all (void); #if defined(CPU_MV_PJ4B) void armv6_idcache_wbinv_all (void); #endif -#if defined(CPU_CORTEXA8) || defined(CPU_CORTEXA_MP) || \ - defined(CPU_MV_PJ4B) || defined(CPU_KRAIT) +#if defined(CPU_CORTEXA) || defined(CPU_MV_PJ4B) || defined(CPU_KRAIT) void armv7_idcache_wbinv_all (void); void armv7_cpu_sleep (int); void armv7_setup (void); diff --git a/freebsd/sys/bsm/audit.h b/freebsd/sys/bsm/audit.h index 7efc93a1..aa776057 100644 --- a/freebsd/sys/bsm/audit.h +++ b/freebsd/sys/bsm/audit.h @@ -1,7 +1,13 @@ /*- * Copyright (c) 2005-2009 Apple Inc. + * Copyright (c) 2016 Robert N. M. Watson * All rights reserved. * + * Portions of this software were developed by BAE Systems, the University of + * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL + * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent + * Computing (TC) research program. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -125,6 +131,8 @@ #define A_SETQCTRL 36 #define A_GETCOND 37 #define A_SETCOND 38 +#define A_GETEVENT 39 /* Get audit event-to-name mapping. */ +#define A_SETEVENT 40 /* Set audit event-to-name mapping. */ /* * Audit policy controls. @@ -300,6 +308,16 @@ struct au_evclass_map { }; typedef struct au_evclass_map au_evclass_map_t; +/* + * Event-to-name mapping. + */ +#define EVNAMEMAP_NAME_SIZE 64 +struct au_evname_map { + au_event_t en_number; + char en_name[EVNAMEMAP_NAME_SIZE]; +}; +typedef struct au_evname_map au_evname_map_t; + /* * Audit system calls. */ diff --git a/freebsd/sys/bsm/audit_kevents.h b/freebsd/sys/bsm/audit_kevents.h index 3c16c739..fb80c124 100644 --- a/freebsd/sys/bsm/audit_kevents.h +++ b/freebsd/sys/bsm/audit_kevents.h @@ -611,6 +611,37 @@ #define AUE_BINDAT 43207 /* TrustedBSD. */ #define AUE_CONNECTAT 43208 /* TrustedBSD. */ #define AUE_CHFLAGSAT 43209 /* FreeBSD-specific. */ +#define AUE_PREADV 43210 /* FreeBSD-specific. */ +#define AUE_PWRITEV 43211 /* FreeBSD-specific. */ +#define AUE_POSIX_FALLOCATE 43212 /* FreeBSD-specific. */ +#define AUE_AIO_MLOCK 43213 /* FreeBSD-specific. */ +#define AUE_PROCCTL 43214 /* FreeBSD-specific. */ +#define AUE_AIO_READ 43215 /* FreeBSD-specific. */ +#define AUE_AIO_WRITE 43216 /* FreeBSD-specific. */ +#define AUE_AIO_RETURN 43217 /* FreeBSD-specific. */ +#define AUE_AIO_SUSPEND 43218 /* FreeBSD-specific. */ +#define AUE_AIO_CANCEL 43219 /* FreeBSD-specific. */ +#define AUE_AIO_ERROR 43220 /* FreeBSD-specific. */ +#define AUE_AIO_WAITCOMPLETE 43221 /* FreeBSD-specific. */ +#define AUE_AIO_FSYNC 43222 /* FreeBSD-specific. */ +#define AUE_THR_CREATE 43223 /* FreeBSD-specific. */ +#define AUE_THR_NEW 43224 /* FreeBSD-specific. */ +#define AUE_THR_EXIT 43225 /* FreeBSD-specific. */ +#define AUE_THR_KILL 43226 /* FreeBSD-specific. */ +#define AUE_THR_KILL2 43227 /* FreeBSD-specific. */ +#define AUE_SETFIB 43228 /* FreeBSD-specific. */ +#define AUE_LIO_LISTIO 43229 /* FreeBSD-specific. */ +#define AUE_SETUGID 43230 /* FreeBSD-specific. */ +#define AUE_SCTP_PEELOFF 43231 /* FreeBSD-specific. */ +#define AUE_SCTP_GENERIC_SENDMSG 43232 /* FreeBSD-specific. */ +#define AUE_SCTP_GENERIC_RECVMSG 43233 /* FreeBSD-specific. */ +#define AUE_JAIL_GET 43234 /* FreeBSD-specific. */ +#define AUE_JAIL_SET 43235 /* FreeBSD-specific. */ +#define AUE_JAIL_REMOVE 43236 /* FreeBSD-specific. */ +#define AUE_GETLOGINCLASS 43237 /* FreeBSD-specific. */ +#define AUE_SETLOGINCLASS 43238 /* FreeBSD-specific. */ +#define AUE_POSIX_FADVISE 43239 /* FreeBSD-specific. */ +#define AUE_SCTP_GENERIC_SENDMSG_IOV 43240 /* FreeBSD-specific. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the @@ -748,7 +779,6 @@ #define AUE_MODWATCH AUE_NULL #define AUE_MSGCL AUE_NULL #define AUE_MSYNC AUE_NULL -#define AUE_PREADV AUE_NULL #define AUE_PROCINFO AUE_NULL #define AUE_PTHREADCANCELED AUE_NULL #define AUE_PTHREADCHDIR AUE_NULL @@ -763,7 +793,6 @@ #define AUE_PTHREADMUTEXINIT AUE_NULL #define AUE_PTHREADMUTEXTRYLOCK AUE_NULL #define AUE_PTHREADMUTEXUNLOCK AUE_NULL -#define AUE_PWRITEV AUE_NULL #define AUE_REMOVEXATTR AUE_NULL #define AUE_SBRK AUE_NULL #define AUE_SELECT AUE_NULL diff --git a/freebsd/sys/cam/cam_ccb.h b/freebsd/sys/cam/cam_ccb.h index d9b91f8d..99249f43 100644 --- a/freebsd/sys/cam/cam_ccb.h +++ b/freebsd/sys/cam/cam_ccb.h @@ -803,6 +803,13 @@ struct ccb_accept_tio { struct scsi_sense_data sense_data; }; +static __inline uint8_t * +atio_cdb_ptr(struct ccb_accept_tio *ccb) +{ + return ((ccb->ccb_h.flags & CAM_CDB_POINTER) ? + ccb->cdb_io.cdb_ptr : ccb->cdb_io.cdb_bytes); +} + /* Release SIM Queue */ struct ccb_relsim { struct ccb_hdr ccb_h; diff --git a/freebsd/sys/cam/cam_periph.h b/freebsd/sys/cam/cam_periph.h index d5a74a51..87f153c3 100644 --- a/freebsd/sys/cam/cam_periph.h +++ b/freebsd/sys/cam/cam_periph.h @@ -45,6 +45,7 @@ extern struct cam_periph *xpt_periph; extern struct periph_driver **periph_drivers; void periphdriver_register(void *); +int periphdriver_unregister(void *); void periphdriver_init(int level); #include @@ -56,8 +57,7 @@ void periphdriver_init(int level); periphdriver_register(data); \ break; \ case MOD_UNLOAD: \ - printf(#name " module unload - not possible for this module type\n"); \ - return EINVAL; \ + return (periphdriver_unregister(data)); \ default: \ return EOPNOTSUPP; \ } \ @@ -71,20 +71,26 @@ void periphdriver_init(int level); DECLARE_MODULE(name, name ## _mod, SI_SUB_DRIVERS, SI_ORDER_ANY); \ MODULE_DEPEND(name, cam, 1, 1, 1) -typedef void (periph_init_t)(void); /* - * Callback informing the peripheral driver - * it can perform it's initialization since - * the XPT is now fully initialized. - */ -typedef periph_init_t *periph_init_func_t; +/* + * Callback informing the peripheral driver it can perform it's + * initialization since the XPT is now fully initialized. + */ +typedef void (periph_init_t)(void); + +/* + * Callback requesting the peripheral driver to remove its instances + * and shutdown, if possible. + */ +typedef int (periph_deinit_t)(void); struct periph_driver { - periph_init_func_t init; - char *driver_name; + periph_init_t *init; + char *driver_name; TAILQ_HEAD(,cam_periph) units; u_int generation; u_int flags; #define CAM_PERIPH_DRV_EARLY 0x01 + periph_deinit_t *deinit; }; typedef enum { diff --git a/freebsd/sys/cam/cam_xpt.h b/freebsd/sys/cam/cam_xpt.h index ba5c924a..8e6027e5 100644 --- a/freebsd/sys/cam/cam_xpt.h +++ b/freebsd/sys/cam/cam_xpt.h @@ -32,11 +32,16 @@ #ifndef _CAM_CAM_XPT_H #define _CAM_CAM_XPT_H 1 +#ifdef _KERNEL +#include +#endif + /* Forward Declarations */ union ccb; struct cam_periph; struct cam_ed; struct cam_sim; +struct sbuf; /* * Definition of a CAM path. Paths are created from bus, target, and lun ids @@ -102,6 +107,7 @@ void xpt_print_device(struct cam_ed *device); void xpt_print(struct cam_path *path, const char *fmt, ...); int xpt_path_string(struct cam_path *path, char *str, size_t str_len); +int xpt_path_sbuf(struct cam_path *path, struct sbuf *sb); path_id_t xpt_path_path_id(struct cam_path *path); target_id_t xpt_path_target_id(struct cam_path *path); lun_id_t xpt_path_lun_id(struct cam_path *path); diff --git a/freebsd/sys/cam/scsi/scsi_all.c b/freebsd/sys/cam/scsi/scsi_all.c index 6ebf9eec..4b767bc2 100644 --- a/freebsd/sys/cam/scsi/scsi_all.c +++ b/freebsd/sys/cam/scsi/scsi_all.c @@ -1378,7 +1378,7 @@ static struct asc_table_entry asc_table[] = { { SST(0x0E, 0x02, SS_RDEF, /* XXX TBD */ "Information unit too long") }, /* DT P R MAEBK F */ - { SST(0x0E, 0x03, SS_RDEF, /* XXX TBD */ + { SST(0x0E, 0x03, SS_FATAL | EINVAL, "Invalid field in command information unit") }, /* D W O BK */ { SST(0x10, 0x00, SS_RDEF, @@ -3624,15 +3624,9 @@ scsi_command_string(struct cam_device *device, struct ccb_scsiio *csio, #endif /* _KERNEL/!_KERNEL */ - if ((csio->ccb_h.flags & CAM_CDB_POINTER) != 0) { - sbuf_printf(sb, "%s. CDB: ", - scsi_op_desc(csio->cdb_io.cdb_ptr[0], inq_data)); - scsi_cdb_sbuf(csio->cdb_io.cdb_ptr, sb); - } else { - sbuf_printf(sb, "%s. CDB: ", - scsi_op_desc(csio->cdb_io.cdb_bytes[0], inq_data)); - scsi_cdb_sbuf(csio->cdb_io.cdb_bytes, sb); - } + sbuf_printf(sb, "%s. CDB: ", + scsi_op_desc(scsiio_cdb_ptr(csio)[0], inq_data)); + scsi_cdb_sbuf(scsiio_cdb_ptr(csio), sb); #ifdef _KERNEL xpt_free_ccb((union ccb *)cgd); @@ -5039,7 +5033,6 @@ scsi_sense_sbuf(struct cam_device *device, struct ccb_scsiio *csio, struct ccb_getdev *cgd; #endif /* _KERNEL */ char path_str[64]; - uint8_t *cdb; #ifndef _KERNEL if (device == NULL) @@ -5137,14 +5130,9 @@ scsi_sense_sbuf(struct cam_device *device, struct ccb_scsiio *csio, sense = &csio->sense_data; } - if (csio->ccb_h.flags & CAM_CDB_POINTER) - cdb = csio->cdb_io.cdb_ptr; - else - cdb = csio->cdb_io.cdb_bytes; - scsi_sense_only_sbuf(sense, csio->sense_len - csio->sense_resid, sb, - path_str, inq_data, cdb, csio->cdb_len); - + path_str, inq_data, scsiio_cdb_ptr(csio), csio->cdb_len); + #ifdef _KERNEL xpt_free_ccb((union ccb*)cgd); #endif /* _KERNEL/!_KERNEL */ diff --git a/freebsd/sys/cam/scsi/scsi_all.h b/freebsd/sys/cam/scsi/scsi_all.h index 64c45fb2..f85d285e 100644 --- a/freebsd/sys/cam/scsi/scsi_all.h +++ b/freebsd/sys/cam/scsi/scsi_all.h @@ -3039,7 +3039,7 @@ struct scsi_set_timestamp_parameters { uint8_t reserved1[4]; uint8_t timestamp[6]; - uint8_t reserved2[4]; + uint8_t reserved2[2]; }; struct scsi_report_timestamp_parameter_data diff --git a/freebsd/sys/crypto/intake.h b/freebsd/sys/crypto/intake.h new file mode 100644 index 00000000..bdded3a7 --- /dev/null +++ b/freebsd/sys/crypto/intake.h @@ -0,0 +1,64 @@ +/*- + * Copyright (c) 2016 Eric McCorkle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _INTAKE_H_ +#define _INTAKE_H_ + +#include + +/* + * This file provides an interface for providing keys to the kernel + * during boot time. + */ + +#define MAX_KEY_BITS 4096 +#define MAX_KEY_BYTES (MAX_KEY_BITS / NBBY) + +#define KEYBUF_SENTINEL 0xcee54b5d /* KEYS4BSD */ + +enum { + KEYBUF_TYPE_NONE, + KEYBUF_TYPE_GELI +}; + +struct keybuf_ent { + unsigned int ke_type; + char ke_data[MAX_KEY_BYTES]; +}; + +struct keybuf { + unsigned int kb_nents; + struct keybuf_ent kb_ents[]; +}; + +#ifdef _KERNEL +/* Get the key intake buffer */ +extern struct keybuf* get_keybuf(void); +#endif + +#endif diff --git a/freebsd/sys/dev/bce/if_bce.c b/freebsd/sys/dev/bce/if_bce.c index 26dacd7b..079e903d 100644 --- a/freebsd/sys/dev/bce/if_bce.c +++ b/freebsd/sys/dev/bce/if_bce.c @@ -2802,7 +2802,7 @@ bce_nvram_write(struct bce_softc *sc, u32 offset, u8 *data_buf, if (align_start || align_end) { buf = malloc(len32, M_DEVBUF, M_NOWAIT); - if (buf == 0) { + if (buf == NULL) { rc = ENOMEM; goto bce_nvram_write_exit; } diff --git a/freebsd/sys/dev/e1000/e1000_82575.c b/freebsd/sys/dev/e1000/e1000_82575.c index 83116e8e..ebf8371c 100644 --- a/freebsd/sys/dev/e1000/e1000_82575.c +++ b/freebsd/sys/dev/e1000/e1000_82575.c @@ -103,7 +103,6 @@ static s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset); static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw); static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw); -static void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value); static void e1000_clear_vfta_i350(struct e1000_hw *hw); static void e1000_i2c_start(struct e1000_hw *hw); diff --git a/freebsd/sys/dev/e1000/e1000_82575.h b/freebsd/sys/dev/e1000/e1000_82575.h index 45fe132e..f8179560 100644 --- a/freebsd/sys/dev/e1000/e1000_82575.h +++ b/freebsd/sys/dev/e1000/e1000_82575.h @@ -493,6 +493,7 @@ enum e1000_promisc_type { void e1000_vfta_set_vf(struct e1000_hw *, u16, bool); void e1000_rlpml_set_vf(struct e1000_hw *, u16); s32 e1000_promisc_set_vf(struct e1000_hw *, enum e1000_promisc_type type); +void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value); u16 e1000_rxpbs_adjust_82580(u32 data); s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data); s32 e1000_set_eee_i350(struct e1000_hw *hw, bool adv1G, bool adv100M); diff --git a/freebsd/sys/dev/e1000/e1000_defines.h b/freebsd/sys/dev/e1000/e1000_defines.h index e33fe0fb..4c2b0903 100644 --- a/freebsd/sys/dev/e1000/e1000_defines.h +++ b/freebsd/sys/dev/e1000/e1000_defines.h @@ -469,6 +469,8 @@ #define ETHERNET_FCS_SIZE 4 #define MAX_JUMBO_FRAME_SIZE 0x3F00 +/* The datasheet maximum supported RX size is 9.5KB (9728 bytes) */ +#define MAX_RX_JUMBO_FRAME_SIZE 0x2600 #define E1000_TX_PTR_GAP 0x1F /* Extended Configuration Control and Size */ diff --git a/freebsd/sys/dev/e1000/e1000_ich8lan.c b/freebsd/sys/dev/e1000/e1000_ich8lan.c index 007488b2..6f6cb582 100644 --- a/freebsd/sys/dev/e1000/e1000_ich8lan.c +++ b/freebsd/sys/dev/e1000/e1000_ich8lan.c @@ -245,8 +245,7 @@ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw) if (ret_val) return FALSE; out: - if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { + if (hw->mac.type >= e1000_pch_lpt) { /* Only unforce SMBus if ME is not active */ if (!(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) { @@ -643,7 +642,7 @@ static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) nvm->type = e1000_nvm_flash_sw; - if (hw->mac.type == e1000_pch_spt) { + if (hw->mac.type >= e1000_pch_spt) { /* in SPT, gfpreg doesn't exist. NVM size is taken from the * STRAP register. This is because in SPT the GbE Flash region * is no longer accessed through the flash registers. Instead, @@ -703,7 +702,7 @@ static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_ich8lan; nvm->ops.release = e1000_release_nvm_ich8lan; - if (hw->mac.type == e1000_pch_spt) { + if (hw->mac.type >= e1000_pch_spt) { nvm->ops.read = e1000_read_nvm_spt; nvm->ops.update = e1000_update_nvm_checksum_spt; } else { @@ -817,8 +816,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) break; } - if ((mac->type == e1000_pch_lpt) || - (mac->type == e1000_pch_spt)) { + if (mac->type >= e1000_pch_lpt) { mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES; mac->ops.rar_set = e1000_rar_set_pch_lpt; mac->ops.setup_physical_interface = e1000_setup_copper_link_pch_lpt; @@ -1578,9 +1576,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * aggressive resulting in many collisions. To avoid this, increase * the IPG and reduce Rx latency in the PHY. */ - if (((hw->mac.type == e1000_pch2lan) || - (hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) && link) { + if ((hw->mac.type >= e1000_pch2lan) && link) { u16 speed, duplex; e1000_get_speed_and_duplex_copper_generic(hw, &speed, &duplex); @@ -1591,7 +1587,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) tipg_reg |= 0xFF; /* Reduce Rx latency in analog PHY */ emi_val = 0; - } else if (hw->mac.type == e1000_pch_spt && + } else if (hw->mac.type >= e1000_pch_spt && duplex == FULL_DUPLEX && speed != SPEED_1000) { tipg_reg |= 0xC; emi_val = 1; @@ -1613,8 +1609,8 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) emi_addr = I217_RX_CONFIG; ret_val = e1000_write_emi_reg_locked(hw, emi_addr, emi_val); - if (hw->mac.type == e1000_pch_lpt || - hw->mac.type == e1000_pch_spt) { + + if (hw->mac.type >= e1000_pch_lpt) { u16 phy_reg; hw->phy.ops.read_reg_locked(hw, I217_PLL_CLOCK_GATE_REG, @@ -1643,7 +1639,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) if (ret_val) return ret_val; - if (hw->mac.type == e1000_pch_spt) { + if (hw->mac.type >= e1000_pch_spt) { u16 data; u16 ptr_gap; @@ -1692,8 +1688,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * on power up. * Set the Beacon Duration for I217 to 8 usec */ - if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { + if (hw->mac.type >= e1000_pch_lpt) { u32 mac_reg; mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM4); @@ -1711,8 +1706,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) if (ret_val) return ret_val; } - if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { + if (hw->mac.type >= e1000_pch_lpt) { /* Set platform power management values for * Latency Tolerance Reporting (LTR) * Optimized Buffer Flush/Fill (OBFF) @@ -1725,15 +1719,20 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) /* Clear link partner's EEE ability */ hw->dev_spec.ich8lan.eee_lp_ability = 0; - /* FEXTNVM6 K1-off workaround */ - if (hw->mac.type == e1000_pch_spt) { - u32 pcieanacfg = E1000_READ_REG(hw, E1000_PCIEANACFG); + if (hw->mac.type >= e1000_pch_lpt) { u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6); - if ((pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE) && - (hw->dev_spec.ich8lan.disable_k1_off == FALSE)) - fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE; - else + if (hw->mac.type == e1000_pch_spt) { + /* FEXTNVM6 K1-off workaround - for SPT only */ + u32 pcieanacfg = E1000_READ_REG(hw, E1000_PCIEANACFG); + + if (pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE) + fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE; + else + fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE; + } + + if (hw->dev_spec.ich8lan.disable_k1_off == TRUE) fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE; E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6); @@ -3673,7 +3672,7 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) /* Clear FCERR and DAEL in hw status by writing 1 */ hsfsts.hsf_status.flcerr = 1; hsfsts.hsf_status.dael = 1; - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsfsts.regval & 0xFFFF); else @@ -3693,7 +3692,7 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) * Begin by setting Flash Cycle Done. */ hsfsts.hsf_status.flcdone = 1; - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsfsts.regval & 0xFFFF); else @@ -3720,7 +3719,7 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) * now set the Flash Cycle Done. */ hsfsts.hsf_status.flcdone = 1; - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsfsts.regval & 0xFFFF); else @@ -3750,13 +3749,13 @@ static s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout) DEBUGFUNC("e1000_flash_cycle_ich8lan"); /* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; else hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.flcgo = 1; - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsflctl.regval << 16); else @@ -3839,7 +3838,7 @@ static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, /* In SPT, only 32 bits access is supported, * so this function should not be called. */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) return -E1000_ERR_NVM; else ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word); @@ -3948,7 +3947,7 @@ static s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, DEBUGFUNC("e1000_read_flash_data_ich8lan"); if (offset > ICH_FLASH_LINEAR_ADDR_MASK || - hw->mac.type != e1000_pch_spt) + hw->mac.type < e1000_pch_spt) return -E1000_ERR_NVM; flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + hw->nvm.flash_base_addr); @@ -4436,7 +4435,7 @@ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, DEBUGFUNC("e1000_write_ich8_data"); - if (hw->mac.type == e1000_pch_spt) { + if (hw->mac.type >= e1000_pch_spt) { if (size != 4 || offset > ICH_FLASH_LINEAR_ADDR_MASK) return -E1000_ERR_NVM; } else { @@ -4456,7 +4455,7 @@ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, /* In SPT, This register is in Lan memory space, not * flash. Therefore, only 32 bit access is supported */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; else @@ -4470,7 +4469,7 @@ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, * not flash. Therefore, only 32 bit access is * supported */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsflctl.regval << 16); else @@ -4532,7 +4531,7 @@ static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, DEBUGFUNC("e1000_write_flash_data32_ich8lan"); - if (hw->mac.type == e1000_pch_spt) { + if (hw->mac.type >= e1000_pch_spt) { if (offset > ICH_FLASH_LINEAR_ADDR_MASK) return -E1000_ERR_NVM; } @@ -4548,7 +4547,7 @@ static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, /* In SPT, This register is in Lan memory space, not * flash. Therefore, only 32 bit access is supported */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS) >> 16; @@ -4563,7 +4562,7 @@ static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, * not flash. Therefore, only 32 bit access is * supported */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsflctl.regval << 16); else @@ -4765,7 +4764,7 @@ static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank) /* Write a value 11 (block Erase) in Flash * Cycle field in hw flash control */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; @@ -4775,7 +4774,7 @@ static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank) ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE; - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsflctl.regval << 16); else @@ -5213,8 +5212,7 @@ static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw) E1000_WRITE_REG(hw, E1000_RFCTL, reg); /* Enable ECC on Lynxpoint */ - if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { + if (hw->mac.type >= e1000_pch_lpt) { reg = E1000_READ_REG(hw, E1000_PBECCSTS); reg |= E1000_PBECCSTS_ECC_ENABLE; E1000_WRITE_REG(hw, E1000_PBECCSTS, reg); @@ -5647,7 +5645,7 @@ void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw) (device_id == E1000_DEV_ID_PCH_LPTLP_I218_V) || (device_id == E1000_DEV_ID_PCH_I218_LM3) || (device_id == E1000_DEV_ID_PCH_I218_V3) || - (hw->mac.type == e1000_pch_spt)) { + (hw->mac.type >= e1000_pch_spt)) { u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6); E1000_WRITE_REG(hw, E1000_FEXTNVM6, diff --git a/freebsd/sys/dev/e1000/em_txrx.c b/freebsd/sys/dev/e1000/em_txrx.c new file mode 100644 index 00000000..2183a8bd --- /dev/null +++ b/freebsd/sys/dev/e1000/em_txrx.c @@ -0,0 +1,817 @@ +#include + +/*- + * Copyright (c) 2016-2017 Matt Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* $FreeBSD$ */ +#include "if_em.h" + +#ifdef RSS +#include +#include +#endif + +#ifdef VERBOSE_DEBUG +#define DPRINTF device_printf +#else +#define DPRINTF(...) +#endif + +/********************************************************************* + * Local Function prototypes + *********************************************************************/ +static int em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, + u32 *txd_lower); +static int em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, + u32 *txd_upper, u32 *txd_lower); +static int em_isc_txd_encap(void *arg, if_pkt_info_t pi); +static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); +static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear); +static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru); +static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, + qidx_t pidx); +static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, + qidx_t budget); +static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); + +static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru); + +static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, + qidx_t budget); +static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); + +static void lem_receive_checksum(int status, int errors, if_rxd_info_t ri); +static void em_receive_checksum(uint32_t status, if_rxd_info_t ri); +static int em_determine_rsstype(u32 pkt_info); +extern int em_intr(void *arg); + +struct if_txrx em_txrx = { + em_isc_txd_encap, + em_isc_txd_flush, + em_isc_txd_credits_update, + em_isc_rxd_available, + em_isc_rxd_pkt_get, + em_isc_rxd_refill, + em_isc_rxd_flush, + em_intr +}; + +struct if_txrx lem_txrx = { + em_isc_txd_encap, + em_isc_txd_flush, + em_isc_txd_credits_update, + lem_isc_rxd_available, + lem_isc_rxd_pkt_get, + lem_isc_rxd_refill, + em_isc_rxd_flush, + em_intr +}; + +extern if_shared_ctx_t em_sctx; + +void +em_dump_rs(struct adapter *adapter) +{ + if_softc_ctx_t scctx = adapter->shared; + struct em_tx_queue *que; + struct tx_ring *txr; + qidx_t i, ntxd, qid, cur; + int16_t rs_cidx; + uint8_t status; + + printf("\n"); + ntxd = scctx->isc_ntxd[0]; + for (qid = 0; qid < adapter->tx_num_queues; qid++) { + que = &adapter->tx_queues[qid]; + txr = &que->txr; + rs_cidx = txr->tx_rs_cidx; + if (rs_cidx != txr->tx_rs_pidx) { + cur = txr->tx_rsq[rs_cidx]; + status = txr->tx_base[cur].upper.fields.status; + if (!(status & E1000_TXD_STAT_DD)) + printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur); + } else { + rs_cidx = (rs_cidx-1)&(ntxd-1); + cur = txr->tx_rsq[rs_cidx]; + printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d ", qid, rs_cidx, cur); + } + printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed, txr->tx_rs_pidx); + for (i = 0; i < ntxd; i++) { + if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD) + printf("%d set ", i); + } + printf("\n"); + } +} + +/********************************************************************** + * + * Setup work for hardware segmentation offload (TSO) on + * adapters using advanced tx descriptors + * + **********************************************************************/ +static int +em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower) +{ + if_softc_ctx_t scctx = adapter->shared; + struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx]; + struct tx_ring *txr = &que->txr; + struct e1000_context_desc *TXD; + int cur, hdr_len; + + hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen; + *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ + E1000_TXD_DTYP_D | /* Data descr type */ + E1000_TXD_CMD_TSE); /* Do TSE on this packet */ + + /* IP and/or TCP header checksum calculation and insertion. */ + *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; + + cur = pi->ipi_pidx; + TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; + + /* + * Start offset for header checksum calculation. + * End offset for header checksum calculation. + * Offset of place put the checksum. + */ + TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen; + TXD->lower_setup.ip_fields.ipcse = + htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1); + TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum); + + /* + * Start offset for payload checksum calculation. + * End offset for payload checksum calculation. + * Offset of place to put the checksum. + */ + TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen; + TXD->upper_setup.tcp_fields.tucse = 0; + TXD->upper_setup.tcp_fields.tucso = + pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum); + + /* + * Payload size per packet w/o any headers. + * Length of all headers up to payload. + */ + TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz); + TXD->tcp_seg_setup.fields.hdr_len = hdr_len; + + TXD->cmd_and_length = htole32(adapter->txd_cmd | + E1000_TXD_CMD_DEXT | /* Extended descr */ + E1000_TXD_CMD_TSE | /* TSE context */ + E1000_TXD_CMD_IP | /* Do IP csum */ + E1000_TXD_CMD_TCP | /* Do TCP checksum */ + (pi->ipi_len - hdr_len)); /* Total len */ + txr->tx_tso = TRUE; + + if (++cur == scctx->isc_ntxd[0]) { + cur = 0; + } + DPRINTF(iflib_get_dev(adapter->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__, pi->ipi_pidx, cur); + return (cur); +} + +#define TSO_WORKAROUND 4 +#define DONT_FORCE_CTX 1 + + +/********************************************************************* + * The offload context is protocol specific (TCP/UDP) and thus + * only needs to be set when the protocol changes. The occasion + * of a context change can be a performance detriment, and + * might be better just disabled. The reason arises in the way + * in which the controller supports pipelined requests from the + * Tx data DMA. Up to four requests can be pipelined, and they may + * belong to the same packet or to multiple packets. However all + * requests for one packet are issued before a request is issued + * for a subsequent packet and if a request for the next packet + * requires a context change, that request will be stalled + * until the previous request completes. This means setting up + * a new context effectively disables pipelined Tx data DMA which + * in turn greatly slow down performance to send small sized + * frames. + **********************************************************************/ + +static int +em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower) +{ + struct e1000_context_desc *TXD = NULL; + if_softc_ctx_t scctx = adapter->shared; + struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx]; + struct tx_ring *txr = &que->txr; + int csum_flags = pi->ipi_csum_flags; + int cur, hdr_len; + u32 cmd; + + cur = pi->ipi_pidx; + hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen; + cmd = adapter->txd_cmd; + + /* + * The 82574L can only remember the *last* context used + * regardless of queue that it was use for. We cannot reuse + * contexts on this hardware platform and must generate a new + * context every time. 82574L hardware spec, section 7.2.6, + * second note. + */ + if (DONT_FORCE_CTX && + adapter->tx_num_queues == 1 && + txr->csum_lhlen == pi->ipi_ehdrlen && + txr->csum_iphlen == pi->ipi_ip_hlen && + txr->csum_flags == csum_flags) { + /* + * Same csum offload context as the previous packets; + * just return. + */ + *txd_upper = txr->csum_txd_upper; + *txd_lower = txr->csum_txd_lower; + return (cur); + } + + TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; + if (csum_flags & CSUM_IP) { + *txd_upper |= E1000_TXD_POPTS_IXSM << 8; + /* + * Start offset for header checksum calculation. + * End offset for header checksum calculation. + * Offset of place to put the checksum. + */ + TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen; + TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); + TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum); + cmd |= E1000_TXD_CMD_IP; + } + + if (csum_flags & (CSUM_TCP|CSUM_UDP)) { + uint8_t tucso; + + *txd_upper |= E1000_TXD_POPTS_TXSM << 8; + *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; + + if (csum_flags & CSUM_TCP) { + tucso = hdr_len + offsetof(struct tcphdr, th_sum); + cmd |= E1000_TXD_CMD_TCP; + } else + tucso = hdr_len + offsetof(struct udphdr, uh_sum); + TXD->upper_setup.tcp_fields.tucss = hdr_len; + TXD->upper_setup.tcp_fields.tucse = htole16(0); + TXD->upper_setup.tcp_fields.tucso = tucso; + } + + txr->csum_lhlen = pi->ipi_ehdrlen; + txr->csum_iphlen = pi->ipi_ip_hlen; + txr->csum_flags = csum_flags; + txr->csum_txd_upper = *txd_upper; + txr->csum_txd_lower = *txd_lower; + + TXD->tcp_seg_setup.data = htole32(0); + TXD->cmd_and_length = + htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd); + + if (++cur == scctx->isc_ntxd[0]) { + cur = 0; + } + DPRINTF(iflib_get_dev(adapter->ctx), "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n", + csum_flags, *txd_upper, *txd_lower, hdr_len, cmd); + return (cur); +} + +static int +em_isc_txd_encap(void *arg, if_pkt_info_t pi) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; + struct tx_ring *txr = &que->txr; + bus_dma_segment_t *segs = pi->ipi_segs; + int nsegs = pi->ipi_nsegs; + int csum_flags = pi->ipi_csum_flags; + int i, j, first, pidx_last; + u32 txd_flags, txd_upper = 0, txd_lower = 0; + + struct e1000_tx_desc *ctxd = NULL; + bool do_tso, tso_desc; + qidx_t ntxd; + + txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0; + i = first = pi->ipi_pidx; + do_tso = (csum_flags & CSUM_TSO); + tso_desc = FALSE; + ntxd = scctx->isc_ntxd[0]; + /* + * TSO Hardware workaround, if this packet is not + * TSO, and is only a single descriptor long, and + * it follows a TSO burst, then we need to add a + * sentinel descriptor to prevent premature writeback. + */ + if ((!do_tso) && (txr->tx_tso == TRUE)) { + if (nsegs == 1) + tso_desc = TRUE; + txr->tx_tso = FALSE; + } + + /* Do hardware assists */ + if (do_tso) { + i = em_tso_setup(sc, pi, &txd_upper, &txd_lower); + tso_desc = TRUE; + } else if (csum_flags & EM_CSUM_OFFLOAD) { + i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower); + } + + if (pi->ipi_mflags & M_VLANTAG) { + /* Set the vlan id. */ + txd_upper |= htole16(pi->ipi_vtag) << 16; + /* Tell hardware to add tag */ + txd_lower |= htole32(E1000_TXD_CMD_VLE); + } + + DPRINTF(iflib_get_dev(sc->ctx), "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i); + /* XXX adapter->pcix_82544 -- lem_fill_descriptors */ + + /* Set up our transmit descriptors */ + for (j = 0; j < nsegs; j++) { + bus_size_t seg_len; + bus_addr_t seg_addr; + uint32_t cmd; + + ctxd = &txr->tx_base[i]; + seg_addr = segs[j].ds_addr; + seg_len = segs[j].ds_len; + cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd; + + /* + * TSO Workaround: + * If this is the last descriptor, we want to + * split it so we have a small final sentinel + */ + if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { + seg_len -= TSO_WORKAROUND; + ctxd->buffer_addr = htole64(seg_addr); + ctxd->lower.data = htole32(cmd | txd_lower | seg_len); + ctxd->upper.data = htole32(txd_upper); + + if (++i == scctx->isc_ntxd[0]) + i = 0; + + /* Now make the sentinel */ + ctxd = &txr->tx_base[i]; + ctxd->buffer_addr = htole64(seg_addr + seg_len); + ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND); + ctxd->upper.data = htole32(txd_upper); + pidx_last = i; + if (++i == scctx->isc_ntxd[0]) + i = 0; + DPRINTF(iflib_get_dev(sc->ctx), "TSO path pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]); + } else { + ctxd->buffer_addr = htole64(seg_addr); + ctxd->lower.data = htole32(cmd | txd_lower | seg_len); + ctxd->upper.data = htole32(txd_upper); + pidx_last = i; + if (++i == scctx->isc_ntxd[0]) + i = 0; + DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]); + } + } + + /* + * Last Descriptor of Packet + * needs End Of Packet (EOP) + * and Report Status (RS) + */ + if (txd_flags) { + txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; + DPRINTF(iflib_get_dev(sc->ctx), "setting to RS on %d rs_pidx %d first: %d\n", pidx_last, txr->tx_rs_pidx, first); + txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1); + MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx); + } + ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags); + DPRINTF(iflib_get_dev(sc->ctx), "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i); + pi->ipi_new_pidx = i; + + return (0); +} + +static void +em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) +{ + struct adapter *adapter = arg; + struct em_tx_queue *que = &adapter->tx_queues[txqid]; + struct tx_ring *txr = &que->txr; + + E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx); +} + +static int +em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) +{ + struct adapter *adapter = arg; + if_softc_ctx_t scctx = adapter->shared; + struct em_tx_queue *que = &adapter->tx_queues[txqid]; + struct tx_ring *txr = &que->txr; + + qidx_t processed = 0; + int updated; + qidx_t cur, prev, ntxd, rs_cidx; + int32_t delta; + uint8_t status; + + rs_cidx = txr->tx_rs_cidx; + if (rs_cidx == txr->tx_rs_pidx) + return (0); + cur = txr->tx_rsq[rs_cidx]; + MPASS(cur != QIDX_INVALID); + status = txr->tx_base[cur].upper.fields.status; + updated = !!(status & E1000_TXD_STAT_DD); + + if (clear == false || updated == 0) + return (updated); + + prev = txr->tx_cidx_processed; + ntxd = scctx->isc_ntxd[0]; + do { + delta = (int32_t)cur - (int32_t)prev; + MPASS(prev == 0 || delta != 0); + if (delta < 0) + delta += ntxd; + DPRINTF(iflib_get_dev(adapter->ctx), + "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n", + __FUNCTION__, prev, cur, clear, delta); + + processed += delta; + prev = cur; + rs_cidx = (rs_cidx + 1) & (ntxd-1); + if (rs_cidx == txr->tx_rs_pidx) + break; + cur = txr->tx_rsq[rs_cidx]; + MPASS(cur != QIDX_INVALID); + status = txr->tx_base[cur].upper.fields.status; + } while ((status & E1000_TXD_STAT_DD)); + + txr->tx_rs_cidx = rs_cidx; + txr->tx_cidx_processed = prev; + return(processed); +} + +static void +lem_isc_rxd_refill(void *arg, if_rxd_update_t iru) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx]; + struct rx_ring *rxr = &que->rxr; + struct e1000_rx_desc *rxd; + uint64_t *paddrs; + uint32_t next_pidx, pidx; + uint16_t count; + int i; + + paddrs = iru->iru_paddrs; + pidx = iru->iru_pidx; + count = iru->iru_count; + + for (i = 0, next_pidx = pidx; i < count; i++) { + rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx]; + rxd->buffer_addr = htole64(paddrs[i]); + /* status bits must be cleared */ + rxd->status = 0; + + if (++next_pidx == scctx->isc_nrxd[0]) + next_pidx = 0; + } +} + +static void +em_isc_rxd_refill(void *arg, if_rxd_update_t iru) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + uint16_t rxqid = iru->iru_qsidx; + struct em_rx_queue *que = &sc->rx_queues[rxqid]; + struct rx_ring *rxr = &que->rxr; + union e1000_rx_desc_extended *rxd; + uint64_t *paddrs; + uint32_t next_pidx, pidx; + uint16_t count; + int i; + + paddrs = iru->iru_paddrs; + pidx = iru->iru_pidx; + count = iru->iru_count; + + for (i = 0, next_pidx = pidx; i < count; i++) { + rxd = &rxr->rx_base[next_pidx]; + rxd->read.buffer_addr = htole64(paddrs[i]); + /* DD bits must be cleared */ + rxd->wb.upper.status_error = 0; + + if (++next_pidx == scctx->isc_nrxd[0]) + next_pidx = 0; + } +} + +static void +em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx) +{ + struct adapter *sc = arg; + struct em_rx_queue *que = &sc->rx_queues[rxqid]; + struct rx_ring *rxr = &que->rxr; + + E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx); +} + +static int +lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + struct em_rx_queue *que = &sc->rx_queues[rxqid]; + struct rx_ring *rxr = &que->rxr; + struct e1000_rx_desc *rxd; + u32 staterr = 0; + int cnt, i; + + if (budget == 1) { + rxd = (struct e1000_rx_desc *)&rxr->rx_base[idx]; + staterr = rxd->status; + return (staterr & E1000_RXD_STAT_DD); + } + + for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { + rxd = (struct e1000_rx_desc *)&rxr->rx_base[i]; + staterr = rxd->status; + + if ((staterr & E1000_RXD_STAT_DD) == 0) + break; + + if (++i == scctx->isc_nrxd[0]) + i = 0; + + if (staterr & E1000_RXD_STAT_EOP) + cnt++; + } + return (cnt); +} + +static int +em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + struct em_rx_queue *que = &sc->rx_queues[rxqid]; + struct rx_ring *rxr = &que->rxr; + union e1000_rx_desc_extended *rxd; + u32 staterr = 0; + int cnt, i; + + if (budget == 1) { + rxd = &rxr->rx_base[idx]; + staterr = le32toh(rxd->wb.upper.status_error); + return (staterr & E1000_RXD_STAT_DD); + } + + for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { + rxd = &rxr->rx_base[i]; + staterr = le32toh(rxd->wb.upper.status_error); + + if ((staterr & E1000_RXD_STAT_DD) == 0) + break; + + if (++i == scctx->isc_nrxd[0]) { + i = 0; + } + + if (staterr & E1000_RXD_STAT_EOP) + cnt++; + + } + return (cnt); +} + +static int +lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) +{ + struct adapter *adapter = arg; + if_softc_ctx_t scctx = adapter->shared; + struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; + struct rx_ring *rxr = &que->rxr; + struct e1000_rx_desc *rxd; + u16 len; + u32 status, errors; + bool eop; + int i, cidx; + + status = errors = i = 0; + cidx = ri->iri_cidx; + + do { + rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx]; + status = rxd->status; + errors = rxd->errors; + + /* Error Checking then decrement count */ + MPASS ((status & E1000_RXD_STAT_DD) != 0); + + len = le16toh(rxd->length); + ri->iri_len += len; + + eop = (status & E1000_RXD_STAT_EOP) != 0; + + /* Make sure bad packets are discarded */ + if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) { + adapter->dropped_pkts++; + /* XXX fixup if common */ + return (EBADMSG); + } + + ri->iri_frags[i].irf_flid = 0; + ri->iri_frags[i].irf_idx = cidx; + ri->iri_frags[i].irf_len = len; + /* Zero out the receive descriptors status. */ + rxd->status = 0; + + if (++cidx == scctx->isc_nrxd[0]) + cidx = 0; + i++; + } while (!eop); + + /* XXX add a faster way to look this up */ + if (adapter->hw.mac.type >= e1000_82543 && !(status & E1000_RXD_STAT_IXSM)) + lem_receive_checksum(status, errors, ri); + + if (status & E1000_RXD_STAT_VP) { + ri->iri_vtag = le16toh(rxd->special); + ri->iri_flags |= M_VLANTAG; + } + + ri->iri_nfrags = i; + + return (0); +} + +static int +em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) +{ + struct adapter *adapter = arg; + if_softc_ctx_t scctx = adapter->shared; + struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; + struct rx_ring *rxr = &que->rxr; + union e1000_rx_desc_extended *rxd; + + u16 len; + u32 pkt_info; + u32 staterr = 0; + bool eop; + int i, cidx, vtag; + + i = vtag = 0; + cidx = ri->iri_cidx; + + do { + rxd = &rxr->rx_base[cidx]; + staterr = le32toh(rxd->wb.upper.status_error); + pkt_info = le32toh(rxd->wb.lower.mrq); + + /* Error Checking then decrement count */ + MPASS ((staterr & E1000_RXD_STAT_DD) != 0); + + len = le16toh(rxd->wb.upper.length); + ri->iri_len += len; + + eop = (staterr & E1000_RXD_STAT_EOP) != 0; + + /* Make sure bad packets are discarded */ + if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) { + adapter->dropped_pkts++; + return EBADMSG; + } + + ri->iri_frags[i].irf_flid = 0; + ri->iri_frags[i].irf_idx = cidx; + ri->iri_frags[i].irf_len = len; + /* Zero out the receive descriptors status. */ + rxd->wb.upper.status_error &= htole32(~0xFF); + + if (++cidx == scctx->isc_nrxd[0]) + cidx = 0; + i++; + } while (!eop); + + /* XXX add a faster way to look this up */ + if (adapter->hw.mac.type >= e1000_82543) + em_receive_checksum(staterr, ri); + + if (staterr & E1000_RXD_STAT_VP) { + vtag = le16toh(rxd->wb.upper.vlan); + } + + ri->iri_vtag = vtag; + if (vtag) + ri->iri_flags |= M_VLANTAG; + + ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); + ri->iri_rsstype = em_determine_rsstype(pkt_info); + + ri->iri_nfrags = i; + return (0); +} + +/********************************************************************* + * + * Verify that the hardware indicated that the checksum is valid. + * Inform the stack about the status of checksum so that stack + * doesn't spend time verifying the checksum. + * + *********************************************************************/ +static void +lem_receive_checksum(int status, int errors, if_rxd_info_t ri) +{ + /* Did it pass? */ + if (status & E1000_RXD_STAT_IPCS && !(errors & E1000_RXD_ERR_IPE)) + ri->iri_csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); + + if (status & E1000_RXD_STAT_TCPCS) { + /* Did it pass? */ + if (!(errors & E1000_RXD_ERR_TCPE)) { + ri->iri_csum_flags |= + (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + ri->iri_csum_data = htons(0xffff); + } + } +} + +/******************************************************************** + * + * Parse the packet type to determine the appropriate hash + * + ******************************************************************/ +static int +em_determine_rsstype(u32 pkt_info) +{ + switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { + case E1000_RXDADV_RSSTYPE_IPV4_TCP: + return M_HASHTYPE_RSS_TCP_IPV4; + case E1000_RXDADV_RSSTYPE_IPV4: + return M_HASHTYPE_RSS_IPV4; + case E1000_RXDADV_RSSTYPE_IPV6_TCP: + return M_HASHTYPE_RSS_TCP_IPV6; + case E1000_RXDADV_RSSTYPE_IPV6_EX: + return M_HASHTYPE_RSS_IPV6_EX; + case E1000_RXDADV_RSSTYPE_IPV6: + return M_HASHTYPE_RSS_IPV6; + case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: + return M_HASHTYPE_RSS_TCP_IPV6_EX; + default: + return M_HASHTYPE_OPAQUE; + } +} + +static void +em_receive_checksum(uint32_t status, if_rxd_info_t ri) +{ + ri->iri_csum_flags = 0; + + /* Ignore Checksum bit is set */ + if (status & E1000_RXD_STAT_IXSM) + return; + + /* If the IP checksum exists and there is no IP Checksum error */ + if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == + E1000_RXD_STAT_IPCS) { + ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); + } + + /* TCP or UDP checksum */ + if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == + E1000_RXD_STAT_TCPCS) { + ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + ri->iri_csum_data = htons(0xffff); + } + if (status & E1000_RXD_STAT_UDPCS) { + ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + ri->iri_csum_data = htons(0xffff); + } +} diff --git a/freebsd/sys/dev/e1000/if_em.c b/freebsd/sys/dev/e1000/if_em.c index d8c1e5a6..2054f994 100644 --- a/freebsd/sys/dev/e1000/if_em.c +++ b/freebsd/sys/dev/e1000/if_em.c @@ -1,101 +1,38 @@ #include -/****************************************************************************** - - Copyright (c) 2001-2015, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - -******************************************************************************/ -/*$FreeBSD$*/ - -#include -#include -#include -#include - -#ifdef HAVE_KERNEL_OPTION_HEADERS -#include -#endif +/*- + * Copyright (c) 2016 Matt Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ -#include -#include -#ifdef DDB -#include -#include -#endif -#if __FreeBSD_version >= 800000 -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "e1000_api.h" -#include "e1000_82571.h" +/* $FreeBSD$ */ #include "if_em.h" +#include +#include + +#define em_mac_min e1000_82547 +#define igb_mac_min e1000_82575 /********************************************************************* * Driver version: @@ -112,187 +49,224 @@ char em_driver_version[] = "7.6.1-k"; * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ -static em_vendor_info_t em_vendor_info_array[] = +static pci_vendor_info_t em_vendor_info_array[] = { - /* Intel(R) PRO/1000 Network Connection */ - { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0}, + /* Intel(R) PRO/1000 Network Connection - Legacy em*/ + PVID(0x8086, E1000_DEV_ID_82540EM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82540EM_LOM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82540EP, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82540EP_LOM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82540EP_LP, "Intel(R) PRO/1000 Network Connection"), + + PVID(0x8086, E1000_DEV_ID_82541EI, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82541ER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82541ER_LOM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82541EI_MOBILE, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82541GI, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82541GI_LF, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82541GI_MOBILE, "Intel(R) PRO/1000 Network Connection"), + + PVID(0x8086, E1000_DEV_ID_82542, "Intel(R) PRO/1000 Network Connection"), + + PVID(0x8086, E1000_DEV_ID_82543GC_FIBER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82543GC_COPPER, "Intel(R) PRO/1000 Network Connection"), + + PVID(0x8086, E1000_DEV_ID_82544EI_COPPER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82544EI_FIBER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82544GC_COPPER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82544GC_LOM, "Intel(R) PRO/1000 Network Connection"), + + PVID(0x8086, E1000_DEV_ID_82545EM_COPPER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82545EM_FIBER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82545GM_COPPER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82545GM_FIBER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82545GM_SERDES, "Intel(R) PRO/1000 Network Connection"), + + PVID(0x8086, E1000_DEV_ID_82546EB_COPPER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82546EB_FIBER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82546GB_COPPER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82546GB_FIBER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82546GB_SERDES, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82546GB_PCIE, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3, "Intel(R) PRO/1000 Network Connection"), + + PVID(0x8086, E1000_DEV_ID_82547EI, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82547EI_MOBILE, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82547GI, "Intel(R) PRO/1000 Network Connection"), + + /* Intel(R) PRO/1000 Network Connection - em */ + PVID(0x8086, E1000_DEV_ID_82571EB_COPPER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82571EB_FIBER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82571EB_SERDES, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82572EI, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82572EI_COPPER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82572EI_FIBER, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82572EI_SERDES, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82573E, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82573E_IAMT, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82573L, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82583V, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH8_IGP_AMT, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH8_IGP_C, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH8_IFE, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH8_IFE_GT, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH8_IFE_G, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH8_82567V_3, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH9_IGP_AMT, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH9_IGP_C, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_V, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH9_IFE, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH9_IFE_GT, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH9_IFE_G, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH9_BM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82574L, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_82574LA, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LF, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_V, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LF, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_V, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LC, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DC, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH2_LV_LM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH2_LV_V, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_V, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_I218_LM2, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_I218_V2, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_I218_LM3, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_I218_V3, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, "Intel(R) PRO/1000 Network Connection"), /* required last entry */ - { 0, 0, 0, 0, 0} + PVID_END }; -/********************************************************************* - * Table of branding strings for all supported NICs. - *********************************************************************/ - -static char *em_strings[] = { - "Intel(R) PRO/1000 Network Connection" +static pci_vendor_info_t igb_vendor_info_array[] = +{ + /* Intel(R) PRO/1000 Network Connection - igb */ + PVID(0x8086, E1000_DEV_ID_82575EB_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82576, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82576_NS, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82576_NS_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82576_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82576_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82576_SERDES_QUAD, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82576_VF, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82580_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82580_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82580_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82580_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82580_COPPER_DUAL, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_82580_QUAD_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_DH89XXCC_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_DH89XXCC_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_DH89XXCC_SFP, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I350_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I350_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I350_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I350_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I350_VF, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I210_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I210_COPPER_IT, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I210_COPPER_OEM1, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I210_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I210_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I210_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I211_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, "Intel(R) PRO/1000 PCI-Express Network Driver"), + PVID(0x8086, E1000_DEV_ID_I354_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), + /* required last entry */ + PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ -static int em_probe(device_t); -static int em_attach(device_t); -static int em_detach(device_t); -static int em_shutdown(device_t); -static int em_suspend(device_t); -static int em_resume(device_t); -#ifdef EM_MULTIQUEUE -static int em_mq_start(if_t, struct mbuf *); -static int em_mq_start_locked(if_t, - struct tx_ring *); -static void em_qflush(if_t); -#else -static void em_start(if_t); -static void em_start_locked(if_t, struct tx_ring *); -#endif -static int em_ioctl(if_t, u_long, caddr_t); -static uint64_t em_get_counter(if_t, ift_counter); -static void em_init(void *); -static void em_init_locked(struct adapter *); -static void em_stop(void *); -static void em_media_status(if_t, struct ifmediareq *); -static int em_media_change(if_t); -static void em_identify_hardware(struct adapter *); -static int em_allocate_pci_resources(struct adapter *); -static int em_allocate_legacy(struct adapter *); -static int em_allocate_msix(struct adapter *); -static int em_allocate_queues(struct adapter *); -static int em_setup_msix(struct adapter *); -static void em_free_pci_resources(struct adapter *); -static void em_local_timer(void *); -static void em_reset(struct adapter *); -static int em_setup_interface(device_t, struct adapter *); -static void em_flush_desc_rings(struct adapter *); - -static void em_setup_transmit_structures(struct adapter *); -static void em_initialize_transmit_unit(struct adapter *); -static int em_allocate_transmit_buffers(struct tx_ring *); -static void em_free_transmit_structures(struct adapter *); -static void em_free_transmit_buffers(struct tx_ring *); - -static int em_setup_receive_structures(struct adapter *); -static int em_allocate_receive_buffers(struct rx_ring *); -static void em_initialize_receive_unit(struct adapter *); -static void em_free_receive_structures(struct adapter *); -static void em_free_receive_buffers(struct rx_ring *); - -static void em_enable_intr(struct adapter *); -static void em_disable_intr(struct adapter *); +static void *em_register(device_t dev); +static void *igb_register(device_t dev); +static int em_if_attach_pre(if_ctx_t ctx); +static int em_if_attach_post(if_ctx_t ctx); +static int em_if_detach(if_ctx_t ctx); +static int em_if_shutdown(if_ctx_t ctx); +static int em_if_suspend(if_ctx_t ctx); +static int em_if_resume(if_ctx_t ctx); + +static int em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); +static int em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); +static void em_if_queues_free(if_ctx_t ctx); + +static uint64_t em_if_get_counter(if_ctx_t, ift_counter); +static void em_if_init(if_ctx_t ctx); +static void em_if_stop(if_ctx_t ctx); +static void em_if_media_status(if_ctx_t, struct ifmediareq *); +static int em_if_media_change(if_ctx_t ctx); +static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu); +static void em_if_timer(if_ctx_t ctx, uint16_t qid); +static void em_if_vlan_register(if_ctx_t ctx, u16 vtag); +static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag); + +static void em_identify_hardware(if_ctx_t ctx); +static int em_allocate_pci_resources(if_ctx_t ctx); +static void em_free_pci_resources(if_ctx_t ctx); +static void em_reset(if_ctx_t ctx); +static int em_setup_interface(if_ctx_t ctx); +static int em_setup_msix(if_ctx_t ctx); + +static void em_initialize_transmit_unit(if_ctx_t ctx); +static void em_initialize_receive_unit(if_ctx_t ctx); + +static void em_if_enable_intr(if_ctx_t ctx); +static void em_if_disable_intr(if_ctx_t ctx); +static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); +static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); +static void em_if_multi_set(if_ctx_t ctx); +static void em_if_update_admin_status(if_ctx_t ctx); +static void em_if_debug(if_ctx_t ctx); static void em_update_stats_counters(struct adapter *); static void em_add_hw_stats(struct adapter *adapter); -static void em_txeof(struct tx_ring *); -static bool em_rxeof(struct rx_ring *, int, int *); -#ifndef __NO_STRICT_ALIGNMENT -static int em_fixup_rx(struct rx_ring *); -#endif -static void em_setup_rxdesc(union e1000_rx_desc_extended *, - const struct em_rxbuffer *rxbuf); -static void em_receive_checksum(uint32_t status, struct mbuf *); -static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int, - struct ip *, u32 *, u32 *); -static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *, - struct tcphdr *, u32 *, u32 *); -static void em_set_promisc(struct adapter *); -static void em_disable_promisc(struct adapter *); -static void em_set_multi(struct adapter *); -static void em_update_link_status(struct adapter *); -static void em_refresh_mbufs(struct rx_ring *, int); -static void em_register_vlan(void *, if_t, u16); -static void em_unregister_vlan(void *, if_t, u16); +static int em_if_set_promisc(if_ctx_t ctx, int flags); static void em_setup_vlan_hw_support(struct adapter *); -static int em_xmit(struct tx_ring *, struct mbuf **); -static int em_dma_malloc(struct adapter *, bus_size_t, - struct em_dma_alloc *, int); -static void em_dma_free(struct adapter *, struct em_dma_alloc *); static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void em_print_nvm_info(struct adapter *); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); +static int em_get_rs(SYSCTL_HANDLER_ARGS); static void em_print_debug_info(struct adapter *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); @@ -301,65 +275,123 @@ static void em_add_int_delay_sysctl(struct adapter *, const char *, /* Management and WOL Support */ static void em_init_manageability(struct adapter *); static void em_release_manageability(struct adapter *); -static void em_get_hw_control(struct adapter *); -static void em_release_hw_control(struct adapter *); -static void em_get_wakeup(device_t); -static void em_enable_wakeup(device_t); +static void em_get_hw_control(struct adapter *); +static void em_release_hw_control(struct adapter *); +static void em_get_wakeup(if_ctx_t ctx); +static void em_enable_wakeup(if_ctx_t ctx); static int em_enable_phy_wakeup(struct adapter *); -static void em_led_func(void *, int); static void em_disable_aspm(struct adapter *); -static int em_irq_fast(void *); +int em_intr(void *arg); +static void em_disable_promisc(if_ctx_t ctx); /* MSIX handlers */ -static void em_msix_tx(void *); -static void em_msix_rx(void *); -static void em_msix_link(void *); -static void em_handle_tx(void *context, int pending); -static void em_handle_rx(void *context, int pending); -static void em_handle_link(void *context, int pending); - -#ifdef EM_MULTIQUEUE -static void em_enable_vectors_82574(struct adapter *); -#endif +static int em_if_msix_intr_assign(if_ctx_t, int); +static int em_msix_link(void *); +static void em_handle_link(void *context); + +static void em_enable_vectors_82574(if_ctx_t); -static void em_set_sysctl_value(struct adapter *, const char *, - const char *, int *, int); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); +static void em_if_led_func(if_ctx_t ctx, int onoff); + +static int em_get_regs(SYSCTL_HANDLER_ARGS); -static __inline void em_rx_discard(struct rx_ring *, int); +static void lem_smartspeed(struct adapter *adapter); +static void igb_configure_queues(struct adapter *adapter); -#ifdef DEVICE_POLLING -static poll_handler_t em_poll; -#endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ - static device_method_t em_methods[] = { /* Device interface */ - DEVMETHOD(device_probe, em_probe), - DEVMETHOD(device_attach, em_attach), - DEVMETHOD(device_detach, em_detach), - DEVMETHOD(device_shutdown, em_shutdown), - DEVMETHOD(device_suspend, em_suspend), - DEVMETHOD(device_resume, em_resume), + DEVMETHOD(device_register, em_register), + DEVMETHOD(device_probe, iflib_device_probe), + DEVMETHOD(device_attach, iflib_device_attach), + DEVMETHOD(device_detach, iflib_device_detach), + DEVMETHOD(device_shutdown, iflib_device_shutdown), + DEVMETHOD(device_suspend, iflib_device_suspend), + DEVMETHOD(device_resume, iflib_device_resume), + DEVMETHOD_END +}; + +static device_method_t igb_methods[] = { + /* Device interface */ + DEVMETHOD(device_register, igb_register), + DEVMETHOD(device_probe, iflib_device_probe), + DEVMETHOD(device_attach, iflib_device_attach), + DEVMETHOD(device_detach, iflib_device_detach), + DEVMETHOD(device_shutdown, iflib_device_shutdown), + DEVMETHOD(device_suspend, iflib_device_suspend), + DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; + static driver_t em_driver = { "em", em_methods, sizeof(struct adapter), }; -devclass_t em_devclass; +static devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); + MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); -#ifdef DEV_NETMAP -MODULE_DEPEND(em, netmap, 1, 1, 1); -#endif /* DEV_NETMAP */ +MODULE_DEPEND(em, iflib, 1, 1, 1); + +static driver_t igb_driver = { + "igb", igb_methods, sizeof(struct adapter), +}; + +static devclass_t igb_devclass; +DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); + +MODULE_DEPEND(igb, pci, 1, 1, 1); +MODULE_DEPEND(igb, ether, 1, 1, 1); +MODULE_DEPEND(igb, iflib, 1, 1, 1); + + +static device_method_t em_if_methods[] = { + DEVMETHOD(ifdi_attach_pre, em_if_attach_pre), + DEVMETHOD(ifdi_attach_post, em_if_attach_post), + DEVMETHOD(ifdi_detach, em_if_detach), + DEVMETHOD(ifdi_shutdown, em_if_shutdown), + DEVMETHOD(ifdi_suspend, em_if_suspend), + DEVMETHOD(ifdi_resume, em_if_resume), + DEVMETHOD(ifdi_init, em_if_init), + DEVMETHOD(ifdi_stop, em_if_stop), + DEVMETHOD(ifdi_msix_intr_assign, em_if_msix_intr_assign), + DEVMETHOD(ifdi_intr_enable, em_if_enable_intr), + DEVMETHOD(ifdi_intr_disable, em_if_disable_intr), + DEVMETHOD(ifdi_tx_queues_alloc, em_if_tx_queues_alloc), + DEVMETHOD(ifdi_rx_queues_alloc, em_if_rx_queues_alloc), + DEVMETHOD(ifdi_queues_free, em_if_queues_free), + DEVMETHOD(ifdi_update_admin_status, em_if_update_admin_status), + DEVMETHOD(ifdi_multi_set, em_if_multi_set), + DEVMETHOD(ifdi_media_status, em_if_media_status), + DEVMETHOD(ifdi_media_change, em_if_media_change), + DEVMETHOD(ifdi_mtu_set, em_if_mtu_set), + DEVMETHOD(ifdi_promisc_set, em_if_set_promisc), + DEVMETHOD(ifdi_timer, em_if_timer), + DEVMETHOD(ifdi_vlan_register, em_if_vlan_register), + DEVMETHOD(ifdi_vlan_unregister, em_if_vlan_unregister), + DEVMETHOD(ifdi_get_counter, em_if_get_counter), + DEVMETHOD(ifdi_led_func, em_if_led_func), + DEVMETHOD(ifdi_rx_queue_intr_enable, em_if_rx_queue_intr_enable), + DEVMETHOD(ifdi_tx_queue_intr_enable, em_if_tx_queue_intr_enable), + DEVMETHOD(ifdi_debug, em_if_debug), + DEVMETHOD_END +}; + +/* + * note that if (adapter->msix_mem) is replaced by: + * if (adapter->intr_type == IFLIB_INTR_MSIX) + */ +static driver_t em_if_driver = { + "em_if", em_if_methods, sizeof(struct adapter) +}; /********************************************************************* * Tunable default values. @@ -367,10 +399,16 @@ MODULE_DEPEND(em, netmap, 1, 1, 1); #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) +#define M_TSO_LEN 66 #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) +/* Allow common code without TSO */ +#ifndef CSUM_TSO +#define CSUM_TSO 0 +#endif + #define TSO_WORKAROUND 4 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); @@ -395,39 +433,15 @@ SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, &em_rx_abs_int_delay_dflt, 0, "Default receive interrupt delay limit in usecs"); -static int em_rxd = EM_DEFAULT_RXD; -static int em_txd = EM_DEFAULT_TXD; -SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0, - "Number of receive descriptors per queue"); -SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0, - "Number of transmit descriptors per queue"); - static int em_smart_pwr_down = FALSE; SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 0, "Set to true to leave smart power down enabled on newer adapters"); /* Controls whether promiscuous also shows bad packets */ -static int em_debug_sbp = FALSE; +static int em_debug_sbp = TRUE; SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, "Show bad packets in promiscuous mode"); -static int em_enable_msix = TRUE; -SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0, - "Enable MSI-X interrupts"); - -#ifdef EM_MULTIQUEUE -static int em_num_queues = 1; -SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0, - "82574 only: Number of queues to configure, 0 indicates autoconfigure"); -#endif - -/* -** Global variable to store last used CPU when binding queues -** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a -** queue is bound to a cpu. -*/ -static int em_last_bind_cpu = -1; - /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, @@ -440,64 +454,243 @@ static int eee_setting = 1; SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, "Enable Energy Efficient Ethernet"); +/* +** Tuneable Interrupt rate +*/ +static int em_max_interrupt_rate = 8000; +SYSCTL_INT(_hw_em, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, + &em_max_interrupt_rate, 0, "Maximum interrupts per second"); + + + /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; -#ifdef DEV_NETMAP /* see ixgbe.c for details */ -#include -#endif /* DEV_NETMAP */ +extern struct if_txrx igb_txrx; +extern struct if_txrx em_txrx; +extern struct if_txrx lem_txrx; + +static struct if_shared_ctx em_sctx_init = { + .isc_magic = IFLIB_MAGIC, + .isc_q_align = PAGE_SIZE, + .isc_tx_maxsize = EM_TSO_SIZE, + .isc_tx_maxsegsize = PAGE_SIZE, + .isc_rx_maxsize = MJUM9BYTES, + .isc_rx_nsegments = 1, + .isc_rx_maxsegsize = MJUM9BYTES, + .isc_nfl = 1, + .isc_nrxqs = 1, + .isc_ntxqs = 1, + .isc_admin_intrcnt = 1, + .isc_vendor_info = em_vendor_info_array, + .isc_driver_version = em_driver_version, + .isc_driver = &em_if_driver, + .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP, + + .isc_nrxd_min = {EM_MIN_RXD}, + .isc_ntxd_min = {EM_MIN_TXD}, + .isc_nrxd_max = {EM_MAX_RXD}, + .isc_ntxd_max = {EM_MAX_TXD}, + .isc_nrxd_default = {EM_DEFAULT_RXD}, + .isc_ntxd_default = {EM_DEFAULT_TXD}, +}; -/********************************************************************* - * Device identification routine +if_shared_ctx_t em_sctx = &em_sctx_init; + + +static struct if_shared_ctx igb_sctx_init = { + .isc_magic = IFLIB_MAGIC, + .isc_q_align = PAGE_SIZE, + .isc_tx_maxsize = EM_TSO_SIZE, + .isc_tx_maxsegsize = PAGE_SIZE, + .isc_rx_maxsize = MJUM9BYTES, + .isc_rx_nsegments = 1, + .isc_rx_maxsegsize = MJUM9BYTES, + .isc_nfl = 1, + .isc_nrxqs = 1, + .isc_ntxqs = 1, + .isc_admin_intrcnt = 1, + .isc_vendor_info = igb_vendor_info_array, + .isc_driver_version = em_driver_version, + .isc_driver = &em_if_driver, + .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP, + + .isc_nrxd_min = {EM_MIN_RXD}, + .isc_ntxd_min = {EM_MIN_TXD}, + .isc_nrxd_max = {EM_MAX_RXD}, + .isc_ntxd_max = {EM_MAX_TXD}, + .isc_nrxd_default = {EM_DEFAULT_RXD}, + .isc_ntxd_default = {EM_DEFAULT_TXD}, +}; + +if_shared_ctx_t igb_sctx = &igb_sctx_init; + +/***************************************************************** * - * em_probe determines if the driver should be loaded on - * adapter based on PCI vendor/device id of the adapter. + * Dump Registers * - * return BUS_PROBE_DEFAULT on success, positive on failure - *********************************************************************/ + ****************************************************************/ +#define IGB_REGS_LEN 739 -static int -em_probe(device_t dev) +static int em_get_regs(SYSCTL_HANDLER_ARGS) { - char adapter_name[60]; - uint16_t pci_vendor_id = 0; - uint16_t pci_device_id = 0; - uint16_t pci_subvendor_id = 0; - uint16_t pci_subdevice_id = 0; - em_vendor_info_t *ent; + struct adapter *adapter = (struct adapter *)arg1; + struct e1000_hw *hw = &adapter->hw; - INIT_DEBUGOUT("em_probe: begin"); + struct sbuf *sb; + u32 *regs_buff = (u32 *)malloc(sizeof(u32) * IGB_REGS_LEN, M_DEVBUF, M_NOWAIT); + int rc; - pci_vendor_id = pci_get_vendor(dev); - if (pci_vendor_id != EM_VENDOR_ID) - return (ENXIO); + memset(regs_buff, 0, IGB_REGS_LEN * sizeof(u32)); - pci_device_id = pci_get_device(dev); - pci_subvendor_id = pci_get_subvendor(dev); - pci_subdevice_id = pci_get_subdevice(dev); - - ent = em_vendor_info_array; - while (ent->vendor_id != 0) { - if ((pci_vendor_id == ent->vendor_id) && - (pci_device_id == ent->device_id) && - - ((pci_subvendor_id == ent->subvendor_id) || - (ent->subvendor_id == PCI_ANY_ID)) && - - ((pci_subdevice_id == ent->subdevice_id) || - (ent->subdevice_id == PCI_ANY_ID))) { - sprintf(adapter_name, "%s %s", - em_strings[ent->index], - em_driver_version); - device_set_desc_copy(dev, adapter_name); - return (BUS_PROBE_DEFAULT); - } - ent++; + rc = sysctl_wire_old_buffer(req, 0); + MPASS(rc == 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 32*400, req); + MPASS(sb != NULL); + if (sb == NULL) + return (ENOMEM); + + /* General Registers */ + regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL); + regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS); + regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT); + regs_buff[3] = E1000_READ_REG(hw, E1000_ICR); + regs_buff[4] = E1000_READ_REG(hw, E1000_RCTL); + regs_buff[5] = E1000_READ_REG(hw, E1000_RDLEN(0)); + regs_buff[6] = E1000_READ_REG(hw, E1000_RDH(0)); + regs_buff[7] = E1000_READ_REG(hw, E1000_RDT(0)); + regs_buff[8] = E1000_READ_REG(hw, E1000_RXDCTL(0)); + regs_buff[9] = E1000_READ_REG(hw, E1000_RDBAL(0)); + regs_buff[10] = E1000_READ_REG(hw, E1000_RDBAH(0)); + regs_buff[11] = E1000_READ_REG(hw, E1000_TCTL); + regs_buff[12] = E1000_READ_REG(hw, E1000_TDBAL(0)); + regs_buff[13] = E1000_READ_REG(hw, E1000_TDBAH(0)); + regs_buff[14] = E1000_READ_REG(hw, E1000_TDLEN(0)); + regs_buff[15] = E1000_READ_REG(hw, E1000_TDH(0)); + regs_buff[16] = E1000_READ_REG(hw, E1000_TDT(0)); + regs_buff[17] = E1000_READ_REG(hw, E1000_TXDCTL(0)); + regs_buff[18] = E1000_READ_REG(hw, E1000_TDFH); + regs_buff[19] = E1000_READ_REG(hw, E1000_TDFT); + regs_buff[20] = E1000_READ_REG(hw, E1000_TDFHS); + regs_buff[21] = E1000_READ_REG(hw, E1000_TDFPC); + + sbuf_printf(sb, "General Registers\n"); + sbuf_printf(sb, "\tCTRL\t %08x\n", regs_buff[0]); + sbuf_printf(sb, "\tSTATUS\t %08x\n", regs_buff[1]); + sbuf_printf(sb, "\tCTRL_EXIT\t %08x\n\n", regs_buff[2]); + + sbuf_printf(sb, "Interrupt Registers\n"); + sbuf_printf(sb, "\tICR\t %08x\n\n", regs_buff[3]); + + sbuf_printf(sb, "RX Registers\n"); + sbuf_printf(sb, "\tRCTL\t %08x\n", regs_buff[4]); + sbuf_printf(sb, "\tRDLEN\t %08x\n", regs_buff[5]); + sbuf_printf(sb, "\tRDH\t %08x\n", regs_buff[6]); + sbuf_printf(sb, "\tRDT\t %08x\n", regs_buff[7]); + sbuf_printf(sb, "\tRXDCTL\t %08x\n", regs_buff[8]); + sbuf_printf(sb, "\tRDBAL\t %08x\n", regs_buff[9]); + sbuf_printf(sb, "\tRDBAH\t %08x\n\n", regs_buff[10]); + + sbuf_printf(sb, "TX Registers\n"); + sbuf_printf(sb, "\tTCTL\t %08x\n", regs_buff[11]); + sbuf_printf(sb, "\tTDBAL\t %08x\n", regs_buff[12]); + sbuf_printf(sb, "\tTDBAH\t %08x\n", regs_buff[13]); + sbuf_printf(sb, "\tTDLEN\t %08x\n", regs_buff[14]); + sbuf_printf(sb, "\tTDH\t %08x\n", regs_buff[15]); + sbuf_printf(sb, "\tTDT\t %08x\n", regs_buff[16]); + sbuf_printf(sb, "\tTXDCTL\t %08x\n", regs_buff[17]); + sbuf_printf(sb, "\tTDFH\t %08x\n", regs_buff[18]); + sbuf_printf(sb, "\tTDFT\t %08x\n", regs_buff[19]); + sbuf_printf(sb, "\tTDFHS\t %08x\n", regs_buff[20]); + sbuf_printf(sb, "\tTDFPC\t %08x\n\n", regs_buff[21]); + +#ifdef DUMP_DESCS + { + if_softc_ctx_t scctx = adapter->shared; + struct rx_ring *rxr = &rx_que->rxr; + struct tx_ring *txr = &tx_que->txr; + int ntxd = scctx->isc_ntxd[0]; + int nrxd = scctx->isc_nrxd[0]; + int j; + + for (j = 0; j < nrxd; j++) { + u32 staterr = le32toh(rxr->rx_base[j].wb.upper.status_error); + u32 length = le32toh(rxr->rx_base[j].wb.upper.length); + sbuf_printf(sb, "\tReceive Descriptor Address %d: %08" PRIx64 " Error:%d Length:%d\n", j, rxr->rx_base[j].read.buffer_addr, staterr, length); + } + + for (j = 0; j < min(ntxd, 256); j++) { + unsigned int *ptr = (unsigned int *)&txr->tx_base[j]; + + sbuf_printf(sb, "\tTXD[%03d] [0]: %08x [1]: %08x [2]: %08x [3]: %08x eop: %d DD=%d\n", + j, ptr[0], ptr[1], ptr[2], ptr[3], buf->eop, + buf->eop != -1 ? txr->tx_base[buf->eop].upper.fields.status & E1000_TXD_STAT_DD : 0); + + } + } +#endif + + rc = sbuf_finish(sb); + sbuf_delete(sb); + return(rc); +} + +static void * +em_register(device_t dev) +{ + return (em_sctx); +} + +static void * +igb_register(device_t dev) +{ + return (igb_sctx); +} + +static int +em_set_num_queues(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + int maxqueues; + + /* Sanity check based on HW */ + switch (adapter->hw.mac.type) { + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + maxqueues = 8; + break; + case e1000_i210: + case e1000_82575: + maxqueues = 4; + break; + case e1000_i211: + case e1000_82574: + maxqueues = 2; + break; + default: + maxqueues = 1; + break; } - return (ENXIO); + return (maxqueues); } + +#define EM_CAPS \ + IFCAP_TSO4 | IFCAP_TXCSUM | IFCAP_LRO | IFCAP_RXCSUM | IFCAP_VLAN_HWFILTER | IFCAP_WOL_MAGIC | \ + IFCAP_WOL_MCAST | IFCAP_WOL | IFCAP_VLAN_HWTSO | IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | \ + IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; + +#define IGB_CAPS \ + IFCAP_TSO4 | IFCAP_TXCSUM | IFCAP_LRO | IFCAP_RXCSUM | IFCAP_VLAN_HWFILTER | IFCAP_WOL_MAGIC | \ + IFCAP_WOL_MCAST | IFCAP_WOL | IFCAP_VLAN_HWTSO | IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | \ + IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_TXCSUM_IPV6 | IFCAP_HWCSUM_IPV6 | IFCAP_JUMBO_MTU; + /********************************************************************* * Device initialization routine * @@ -509,23 +702,30 @@ em_probe(device_t dev) *********************************************************************/ static int -em_attach(device_t dev) +em_if_attach_pre(if_ctx_t ctx) { - struct adapter *adapter; - struct e1000_hw *hw; - int error = 0; + struct adapter *adapter; + if_softc_ctx_t scctx; + device_t dev; + struct e1000_hw *hw; + int error = 0; - INIT_DEBUGOUT("em_attach: begin"); + INIT_DEBUGOUT("em_if_attach_pre begin"); + dev = iflib_get_dev(ctx); + adapter = iflib_get_softc(ctx); if (resource_disabled("em", device_get_unit(dev))) { device_printf(dev, "Disabled by device hint\n"); return (ENXIO); } - adapter = device_get_softc(dev); + adapter->ctx = ctx; adapter->dev = adapter->osdep.dev = dev; + scctx = adapter->shared = iflib_get_softc_ctx(ctx); + adapter->media = iflib_get_media(ctx); hw = &adapter->hw; - EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); + + adapter->tx_process_limit = scctx->isc_ntxd[0]; /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), @@ -543,13 +743,78 @@ em_attach(device_t dev) OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_set_flowcntl, "I", "Flow Control"); - callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "reg_dump", CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, + em_get_regs, "A", "Dump Registers"); + + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "rs_dump", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + em_get_rs, "I", "Dump RS indexes"); /* Determine hardware and mac info */ - em_identify_hardware(adapter); + em_identify_hardware(ctx); + + /* Set isc_msix_bar */ + scctx->isc_msix_bar = PCIR_BAR(EM_MSIX_BAR); + scctx->isc_tx_nsegments = EM_MAX_SCATTER; + scctx->isc_tx_tso_segments_max = scctx->isc_tx_nsegments; + scctx->isc_tx_tso_size_max = EM_TSO_SIZE; + scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE; + scctx->isc_nrxqsets_max = scctx->isc_ntxqsets_max = em_set_num_queues(ctx); + device_printf(dev, "attach_pre capping queues at %d\n", scctx->isc_ntxqsets_max); + + scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; + + + if (adapter->hw.mac.type >= igb_mac_min) { + int try_second_bar; + + scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union e1000_adv_tx_desc), EM_DBA_ALIGN); + scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_adv_rx_desc), EM_DBA_ALIGN); + scctx->isc_txd_size[0] = sizeof(union e1000_adv_tx_desc); + scctx->isc_rxd_size[0] = sizeof(union e1000_adv_rx_desc); + scctx->isc_txrx = &igb_txrx; + scctx->isc_capenable = IGB_CAPS; + scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP \ + | CSUM_IP6_UDP | CSUM_IP6_TCP; + if (adapter->hw.mac.type != e1000_82575) + scctx->isc_tx_csum_flags |= CSUM_SCTP | CSUM_IP6_SCTP; + + /* + ** Some new devices, as with ixgbe, now may + ** use a different BAR, so we need to keep + ** track of which is used. + */ + try_second_bar = pci_read_config(dev, scctx->isc_msix_bar, 4); + if (try_second_bar == 0) + scctx->isc_msix_bar += 4; + + } else if (adapter->hw.mac.type >= em_mac_min) { + scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]* sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); + scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); + scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); + scctx->isc_rxd_size[0] = sizeof(union e1000_rx_desc_extended); + scctx->isc_txrx = &em_txrx; + scctx->isc_capenable = EM_CAPS; + scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; + } else { + scctx->isc_txqsizes[0] = roundup2((scctx->isc_ntxd[0] + 1) * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); + scctx->isc_rxqsizes[0] = roundup2((scctx->isc_nrxd[0] + 1) * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); + scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); + scctx->isc_rxd_size[0] = sizeof(struct e1000_rx_desc); + scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; + scctx->isc_txrx = &lem_txrx; + scctx->isc_capenable = EM_CAPS; + if (adapter->hw.mac.type < e1000_82543) + scctx->isc_capenable &= ~(IFCAP_HWCSUM|IFCAP_VLAN_HWCSUM); + scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; + scctx->isc_msix_bar = 0; + } /* Setup PCI resources */ - if (em_allocate_pci_resources(adapter)) { + if (em_allocate_pci_resources(ctx)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; @@ -558,7 +823,7 @@ em_attach(device_t dev) /* ** For ICH8 and family we need to ** map the flash memory, and this - ** must happen after the MAC is + ** must happen after the MAC is ** identified */ if ((hw->mac.type == e1000_ich8lan) || @@ -605,11 +870,7 @@ em_attach(device_t dev) goto err_pci; } - /* - * Setup MSI/X or MSI if PCI Express - */ - adapter->msix = em_setup_msix(adapter); - + em_setup_msix(ctx); e1000_get_bus_info(hw); /* Set up some sysctls for the tunable interrupt delays */ @@ -635,36 +896,14 @@ em_attach(device_t dev) E1000_REGISTER(hw, E1000_ITR), DEFAULT_ITR); - /* Sysctl for limiting the amount of work done in the taskqueue */ - em_set_sysctl_value(adapter, "rx_processing_limit", - "max number of rx packets to process", &adapter->rx_process_limit, - em_rx_process_limit); - - /* - * Validate number of transmit and receive descriptors. It - * must not exceed hardware maximum, and must be multiple - * of E1000_DBA_ALIGN. - */ - if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || - (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) { - device_printf(dev, "Using %d TX descriptors instead of %d!\n", - EM_DEFAULT_TXD, em_txd); - adapter->num_tx_desc = EM_DEFAULT_TXD; - } else - adapter->num_tx_desc = em_txd; - - if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 || - (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) { - device_printf(dev, "Using %d RX descriptors instead of %d!\n", - EM_DEFAULT_RXD, em_rxd); - adapter->num_rx_desc = EM_DEFAULT_RXD; - } else - adapter->num_rx_desc = em_rxd; - hw->mac.autoneg = DO_AUTO_NEG; hw->phy.autoneg_wait_to_complete = FALSE; hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; + if (adapter->hw.mac.type < em_mac_min) { + e1000_init_script_state_82541(&adapter->hw, TRUE); + e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE); + } /* Copper options */ if (hw->phy.media_type == e1000_media_type_copper) { hw->phy.mdix = AUTO_ALL_MODES; @@ -676,7 +915,7 @@ em_attach(device_t dev) * Set the frame limits assuming * standard ethernet sized frames. */ - adapter->hw.mac.max_frame_size = + scctx->isc_max_frame_size = adapter->hw.mac.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* @@ -685,14 +924,6 @@ em_attach(device_t dev) */ hw->mac.report_tx_early = 1; - /* - ** Get queue/ring memory - */ - if (em_allocate_queues(adapter)) { - error = ENOMEM; - goto err_pci; - } - /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); @@ -705,7 +936,7 @@ em_attach(device_t dev) /* Check SOL/IDER usage */ if (e1000_check_reset_block(hw)) device_printf(dev, "PHY reset is blocked" - " due to SOL/IDER session.\n"); + " due to SOL/IDER session.\n"); /* Sysctl for setting Energy Efficient Ethernet */ hw->dev_spec.ich8lan.eee_disable = eee_setting; @@ -722,7 +953,6 @@ em_attach(device_t dev) */ e1000_reset_hw(hw); - /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(hw) < 0) { /* @@ -741,7 +971,7 @@ em_attach(device_t dev) /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" - " address\n"); + " address\n"); error = EIO; goto err_late; } @@ -755,68 +985,58 @@ em_attach(device_t dev) /* Disable ULP support */ e1000_disable_ulp_lpt_lp(hw, TRUE); - /* - ** Do interrupt configuration - */ - if (adapter->msix > 1) /* Do MSIX */ - error = em_allocate_msix(adapter); - else /* MSI or Legacy */ - error = em_allocate_legacy(adapter); - if (error) - goto err_late; - /* * Get Wake-on-Lan and Management info for later use */ - em_get_wakeup(dev); + em_get_wakeup(ctx); + + iflib_set_mac(ctx, hw->mac.addr); + + return (0); + +err_late: + em_release_hw_control(adapter); +err_pci: + em_free_pci_resources(ctx); + free(adapter->mta, M_DEVBUF); + + return (error); +} +static int +em_if_attach_post(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct e1000_hw *hw = &adapter->hw; + int error = 0; + /* Setup OS specific network interface */ - if (em_setup_interface(dev, adapter) != 0) + error = em_setup_interface(ctx); + if (error != 0) { goto err_late; + } - em_reset(adapter); + em_reset(ctx); /* Initialize statistics */ em_update_stats_counters(adapter); - hw->mac.get_link_status = 1; - em_update_link_status(adapter); - - /* Register for VLAN events */ - adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, - em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); - adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, - em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); - + em_if_update_admin_status(ctx); em_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) em_get_hw_control(adapter); + + INIT_DEBUGOUT("em_if_attach_post: end"); - /* Tell the stack that the interface is not active */ - if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); - - adapter->led_dev = led_create(em_led_func, adapter, - device_get_nameunit(dev)); -#ifdef DEV_NETMAP - em_netmap_attach(adapter); -#endif /* DEV_NETMAP */ - - INIT_DEBUGOUT("em_attach: end"); - - return (0); + return (error); err_late: - em_free_transmit_structures(adapter); - em_free_receive_structures(adapter); em_release_hw_control(adapter); - if (adapter->ifp != (void *)NULL) - if_free(adapter->ifp); -err_pci: - em_free_pci_resources(adapter); + em_free_pci_resources(ctx); + em_if_queues_free(ctx); free(adapter->mta, M_DEVBUF); - EM_CORE_LOCK_DESTROY(adapter); return (error); } @@ -832,60 +1052,17 @@ err_pci: *********************************************************************/ static int -em_detach(device_t dev) +em_if_detach(if_ctx_t ctx) { - struct adapter *adapter = device_get_softc(dev); - if_t ifp = adapter->ifp; + struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("em_detach: begin"); - /* Make sure VLANS are not using driver */ - if (if_vlantrunkinuse(ifp)) { - device_printf(dev,"Vlan in use, detach first\n"); - return (EBUSY); - } - -#ifdef DEVICE_POLLING - if (if_getcapenable(ifp) & IFCAP_POLLING) - ether_poll_deregister(ifp); -#endif - - if (adapter->led_dev != NULL) - led_destroy(adapter->led_dev); - - EM_CORE_LOCK(adapter); - adapter->in_detach = 1; - em_stop(adapter); - EM_CORE_UNLOCK(adapter); - EM_CORE_LOCK_DESTROY(adapter); - e1000_phy_hw_reset(&adapter->hw); em_release_manageability(adapter); em_release_hw_control(adapter); - - /* Unregister VLAN events */ - if (adapter->vlan_attach != NULL) - EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); - if (adapter->vlan_detach != NULL) - EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); - - ether_ifdetach(adapter->ifp); - callout_drain(&adapter->timer); - -#ifdef DEV_NETMAP - netmap_detach(ifp); -#endif /* DEV_NETMAP */ - - em_free_pci_resources(adapter); - bus_generic_detach(dev); - if_free(ifp); - - em_free_transmit_structures(adapter); - em_free_receive_structures(adapter); - - em_release_hw_control(adapter); - free(adapter->mta, M_DEVBUF); + em_free_pci_resources(ctx); return (0); } @@ -897,534 +1074,166 @@ em_detach(device_t dev) **********************************************************************/ static int -em_shutdown(device_t dev) +em_if_shutdown(if_ctx_t ctx) { - return em_suspend(dev); + return em_if_suspend(ctx); } /* * Suspend/resume device methods. */ static int -em_suspend(device_t dev) +em_if_suspend(if_ctx_t ctx) { - struct adapter *adapter = device_get_softc(dev); - - EM_CORE_LOCK(adapter); + struct adapter *adapter = iflib_get_softc(ctx); - em_release_manageability(adapter); + em_release_manageability(adapter); em_release_hw_control(adapter); - em_enable_wakeup(dev); - - EM_CORE_UNLOCK(adapter); - - return bus_generic_suspend(dev); + em_enable_wakeup(ctx); + return (0); } static int -em_resume(device_t dev) +em_if_resume(if_ctx_t ctx) { - struct adapter *adapter = device_get_softc(dev); - struct tx_ring *txr = adapter->tx_rings; - if_t ifp = adapter->ifp; + struct adapter *adapter = iflib_get_softc(ctx); - EM_CORE_LOCK(adapter); if (adapter->hw.mac.type == e1000_pch2lan) e1000_resume_workarounds_pchlan(&adapter->hw); - em_init_locked(adapter); + em_if_init(ctx); em_init_manageability(adapter); - if ((if_getflags(ifp) & IFF_UP) && - (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) { - for (int i = 0; i < adapter->num_queues; i++, txr++) { - EM_TX_LOCK(txr); -#ifdef EM_MULTIQUEUE - if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr); -#else - if (!if_sendq_empty(ifp)) - em_start_locked(ifp, txr); -#endif - EM_TX_UNLOCK(txr); - } - } - EM_CORE_UNLOCK(adapter); - - return bus_generic_resume(dev); + return(0); } - -#ifndef EM_MULTIQUEUE -static void -em_start_locked(if_t ifp, struct tx_ring *txr) +static int +em_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { - struct adapter *adapter = if_getsoftc(ifp); - struct mbuf *m_head; - - EM_TX_LOCK_ASSERT(txr); - - if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != - IFF_DRV_RUNNING) - return; - - if (!adapter->link_active) - return; - - while (!if_sendq_empty(ifp)) { - /* Call cleanup if number of TX descriptors low */ - if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) - em_txeof(txr); - if (txr->tx_avail < EM_MAX_SCATTER) { - if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0); - break; - } - m_head = if_dequeue(ifp); - if (m_head == NULL) - break; - /* - * Encapsulation can modify our pointer, and or make it - * NULL on failure. In that event, we can't requeue. - */ - if (em_xmit(txr, &m_head)) { - if (m_head == NULL) - break; - if_sendq_prepend(ifp, m_head); - break; - } + int max_frame_size; + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = iflib_get_softc_ctx(ctx); - /* Mark the queue as having work */ - if (txr->busy == EM_TX_IDLE) - txr->busy = EM_TX_BUSY; - - /* Send a copy of the frame to the BPF listener */ - ETHER_BPF_MTAP(ifp, m_head); + IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); + switch (adapter->hw.mac.type) { + case e1000_82571: + case e1000_82572: + case e1000_ich9lan: + case e1000_ich10lan: + case e1000_pch2lan: + case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_82574: + case e1000_82583: + case e1000_80003es2lan: + /* 9K Jumbo Frame size */ + max_frame_size = 9234; + break; + case e1000_pchlan: + max_frame_size = 4096; + break; + case e1000_82542: + case e1000_ich8lan: + /* Adapters that do not support jumbo frames */ + max_frame_size = ETHER_MAX_LEN; + break; + default: + if (adapter->hw.mac.type >= igb_mac_min) + max_frame_size = 9234; + else /* lem */ + max_frame_size = MAX_JUMBO_FRAME_SIZE; + } + if (mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { + return (EINVAL); } - return; + scctx->isc_max_frame_size = adapter->hw.mac.max_frame_size = + mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; + return (0); } -static void -em_start(if_t ifp) -{ - struct adapter *adapter = if_getsoftc(ifp); - struct tx_ring *txr = adapter->tx_rings; - - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - EM_TX_LOCK(txr); - em_start_locked(ifp, txr); - EM_TX_UNLOCK(txr); - } - return; -} -#else /* EM_MULTIQUEUE */ /********************************************************************* - * Multiqueue Transmit routines + * Init entry point + * + * This routine is used in two ways. It is used by the stack as + * init entry point in network interface structure. It is also used + * by the driver as a hw/sw initialization routine to get to a + * consistent state. * - * em_mq_start is called by the stack to initiate a transmit. - * however, if busy the driver can queue the request rather - * than do an immediate send. It is this that is an advantage - * in this driver, rather than also having multiple tx queues. + * return 0 on success, positive on failure **********************************************************************/ -/* -** Multiqueue capable stack interface -*/ -static int -em_mq_start(if_t ifp, struct mbuf *m) -{ - struct adapter *adapter = if_getsoftc(ifp); - struct tx_ring *txr = adapter->tx_rings; - unsigned int i, error; - if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) - i = m->m_pkthdr.flowid % adapter->num_queues; - else - i = curcpu % adapter->num_queues; +static void +em_if_init(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + struct em_tx_queue *tx_que; + int i; + INIT_DEBUGOUT("em_if_init: begin"); - txr = &adapter->tx_rings[i]; + /* Get the latest mac address, User can use a LAA */ + bcopy(if_getlladdr(ifp), adapter->hw.mac.addr, + ETHER_ADDR_LEN); - error = drbr_enqueue(ifp, txr->br, m); - if (error) - return (error); + /* Put the address into the Receive Address Array */ + e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); - if (EM_TX_TRYLOCK(txr)) { - em_mq_start_locked(ifp, txr); - EM_TX_UNLOCK(txr); - } else - taskqueue_enqueue(txr->tq, &txr->tx_task); + /* + * With the 82571 adapter, RAR[0] may be overwritten + * when the other port is reset, we make a duplicate + * in RAR[14] for that eventuality, this assures + * the interface continues to function. + */ + if (adapter->hw.mac.type == e1000_82571) { + e1000_set_laa_state_82571(&adapter->hw, TRUE); + e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, + E1000_RAR_ENTRIES - 1); + } - return (0); -} -static int -em_mq_start_locked(if_t ifp, struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct mbuf *next; - int err = 0, enq = 0; + /* Initialize the hardware */ + em_reset(ctx); + em_if_update_admin_status(ctx); - EM_TX_LOCK_ASSERT(txr); + for (i = 0, tx_que = adapter->tx_queues; i < adapter->tx_num_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; - if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) || - adapter->link_active == 0) { - return (ENETDOWN); + txr->tx_rs_cidx = txr->tx_rs_pidx = txr->tx_cidx_processed = 0; } - /* Process the queue */ - while ((next = drbr_peek(ifp, txr->br)) != NULL) { - if ((err = em_xmit(txr, &next)) != 0) { - if (next == NULL) { - /* It was freed, move forward */ - drbr_advance(ifp, txr->br); - } else { - /* - * Still have one left, it may not be - * the same since the transmit function - * may have changed it. - */ - drbr_putback(ifp, txr->br, next); - } - break; - } - drbr_advance(ifp, txr->br); - enq++; - if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len); - if (next->m_flags & M_MCAST) - if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); - ETHER_BPF_MTAP(ifp, next); - if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) - break; - } - - /* Mark the queue as having work */ - if ((enq > 0) && (txr->busy == EM_TX_IDLE)) - txr->busy = EM_TX_BUSY; - - if (txr->tx_avail < EM_MAX_SCATTER) - em_txeof(txr); - if (txr->tx_avail < EM_MAX_SCATTER) { - if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0); - } - return (err); -} - -/* -** Flush all ring buffers -*/ -static void -em_qflush(if_t ifp) -{ - struct adapter *adapter = if_getsoftc(ifp); - struct tx_ring *txr = adapter->tx_rings; - struct mbuf *m; - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - EM_TX_LOCK(txr); - while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) - m_freem(m); - EM_TX_UNLOCK(txr); - } - if_qflush(ifp); -} -#endif /* EM_MULTIQUEUE */ - -/********************************************************************* - * Ioctl entry point - * - * em_ioctl is called when the user wants to configure the - * interface. - * - * return 0 on success, positive on failure - **********************************************************************/ - -static int -em_ioctl(if_t ifp, u_long command, caddr_t data) -{ - struct adapter *adapter = if_getsoftc(ifp); - struct ifreq *ifr = (struct ifreq *)data; -#if defined(INET) || defined(INET6) - struct ifaddr *ifa = (struct ifaddr *)data; -#endif - bool avoid_reset = FALSE; - int error = 0; - - if (adapter->in_detach) - return (error); - - switch (command) { - case SIOCSIFADDR: -#ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) - avoid_reset = TRUE; -#endif -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) - avoid_reset = TRUE; -#endif - /* - ** Calling init results in link renegotiation, - ** so we avoid doing it when possible. - */ - if (avoid_reset) { - if_setflagbits(ifp,IFF_UP,0); - if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) - em_init(adapter); -#ifdef INET - if (!(if_getflags(ifp) & IFF_NOARP)) - arp_ifinit(ifp, ifa); -#endif - } else - error = ether_ioctl(ifp, command, data); - break; - case SIOCSIFMTU: - { - int max_frame_size; - - IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); - - EM_CORE_LOCK(adapter); - switch (adapter->hw.mac.type) { - case e1000_82571: - case e1000_82572: - case e1000_ich9lan: - case e1000_ich10lan: - case e1000_pch2lan: - case e1000_pch_lpt: - case e1000_pch_spt: - case e1000_82574: - case e1000_82583: - case e1000_80003es2lan: /* 9K Jumbo Frame size */ - max_frame_size = 9234; - break; - case e1000_pchlan: - max_frame_size = 4096; - break; - /* Adapters that do not support jumbo frames */ - case e1000_ich8lan: - max_frame_size = ETHER_MAX_LEN; - break; - default: - max_frame_size = MAX_JUMBO_FRAME_SIZE; - } - if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - - ETHER_CRC_LEN) { - EM_CORE_UNLOCK(adapter); - error = EINVAL; - break; - } - - if_setmtu(ifp, ifr->ifr_mtu); - adapter->hw.mac.max_frame_size = - if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN; - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) - em_init_locked(adapter); - EM_CORE_UNLOCK(adapter); - break; - } - case SIOCSIFFLAGS: - IOCTL_DEBUGOUT("ioctl rcv'd:\ - SIOCSIFFLAGS (Set Interface Flags)"); - EM_CORE_LOCK(adapter); - if (if_getflags(ifp) & IFF_UP) { - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - if ((if_getflags(ifp) ^ adapter->if_flags) & - (IFF_PROMISC | IFF_ALLMULTI)) { - em_disable_promisc(adapter); - em_set_promisc(adapter); - } - } else - em_init_locked(adapter); - } else - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) - em_stop(adapter); - adapter->if_flags = if_getflags(ifp); - EM_CORE_UNLOCK(adapter); - break; - case SIOCADDMULTI: - case SIOCDELMULTI: - IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - EM_CORE_LOCK(adapter); - em_disable_intr(adapter); - em_set_multi(adapter); -#ifdef DEVICE_POLLING - if (!(if_getcapenable(ifp) & IFCAP_POLLING)) -#endif - em_enable_intr(adapter); - EM_CORE_UNLOCK(adapter); - } - break; - case SIOCSIFMEDIA: - /* Check SOL/IDER usage */ - EM_CORE_LOCK(adapter); - if (e1000_check_reset_block(&adapter->hw)) { - EM_CORE_UNLOCK(adapter); - device_printf(adapter->dev, "Media change is" - " blocked due to SOL/IDER session.\n"); - break; - } - EM_CORE_UNLOCK(adapter); - /* falls thru */ - case SIOCGIFMEDIA: - IOCTL_DEBUGOUT("ioctl rcv'd: \ - SIOCxIFMEDIA (Get/Set Interface Media)"); - error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); - break; - case SIOCSIFCAP: - { - int mask, reinit; - - IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); - reinit = 0; - mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); -#ifdef DEVICE_POLLING - if (mask & IFCAP_POLLING) { - if (ifr->ifr_reqcap & IFCAP_POLLING) { - error = ether_poll_register(em_poll, ifp); - if (error) - return (error); - EM_CORE_LOCK(adapter); - em_disable_intr(adapter); - if_setcapenablebit(ifp, IFCAP_POLLING, 0); - EM_CORE_UNLOCK(adapter); - } else { - error = ether_poll_deregister(ifp); - /* Enable interrupt even in error case */ - EM_CORE_LOCK(adapter); - em_enable_intr(adapter); - if_setcapenablebit(ifp, 0, IFCAP_POLLING); - EM_CORE_UNLOCK(adapter); - } - } -#endif - if (mask & IFCAP_HWCSUM) { - if_togglecapenable(ifp,IFCAP_HWCSUM); - reinit = 1; - } - if (mask & IFCAP_TSO4) { - if_togglecapenable(ifp,IFCAP_TSO4); - reinit = 1; - } - if (mask & IFCAP_VLAN_HWTAGGING) { - if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING); - reinit = 1; - } - if (mask & IFCAP_VLAN_HWFILTER) { - if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER); - reinit = 1; - } - if (mask & IFCAP_VLAN_HWTSO) { - if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); - reinit = 1; - } - if ((mask & IFCAP_WOL) && - (if_getcapabilities(ifp) & IFCAP_WOL) != 0) { - if (mask & IFCAP_WOL_MCAST) - if_togglecapenable(ifp, IFCAP_WOL_MCAST); - if (mask & IFCAP_WOL_MAGIC) - if_togglecapenable(ifp, IFCAP_WOL_MAGIC); - } - if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING)) - em_init(adapter); - if_vlancap(ifp); - break; - } - - default: - error = ether_ioctl(ifp, command, data); - break; - } - - return (error); -} - - -/********************************************************************* - * Init entry point - * - * This routine is used in two ways. It is used by the stack as - * init entry point in network interface structure. It is also used - * by the driver as a hw/sw initialization routine to get to a - * consistent state. - * - * return 0 on success, positive on failure - **********************************************************************/ - -static void -em_init_locked(struct adapter *adapter) -{ - if_t ifp = adapter->ifp; - device_t dev = adapter->dev; - - INIT_DEBUGOUT("em_init: begin"); - - EM_CORE_LOCK_ASSERT(adapter); - - em_disable_intr(adapter); - callout_stop(&adapter->timer); - - /* Get the latest mac address, User can use a LAA */ - bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr, - ETHER_ADDR_LEN); - - /* Put the address into the Receive Address Array */ - e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); - - /* - * With the 82571 adapter, RAR[0] may be overwritten - * when the other port is reset, we make a duplicate - * in RAR[14] for that eventuality, this assures - * the interface continues to function. - */ - if (adapter->hw.mac.type == e1000_82571) { - e1000_set_laa_state_82571(&adapter->hw, TRUE); - e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, - E1000_RAR_ENTRIES - 1); - } - - /* Initialize the hardware */ - em_reset(adapter); - em_update_link_status(adapter); - /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); - /* Set hardware offload abilities */ - if_clearhwassist(ifp); - if (if_getcapenable(ifp) & IFCAP_TXCSUM) - if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0); - - if (if_getcapenable(ifp) & IFCAP_TSO4) - if_sethwassistbits(ifp, CSUM_TSO, 0); + /* Clear bad data from Rx FIFOs */ + if (adapter->hw.mac.type >= igb_mac_min) + e1000_rx_fifo_flush_82575(&adapter->hw); /* Configure for OS presence */ em_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ - em_setup_transmit_structures(adapter); - em_initialize_transmit_unit(adapter); + em_initialize_transmit_unit(ctx); /* Setup Multicast table */ - em_set_multi(adapter); + em_if_multi_set(ctx); /* - ** Figure out the desired mbuf - ** pool for doing jumbos - */ + * Figure out the desired mbuf + * pool for doing jumbos + */ if (adapter->hw.mac.max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; +#ifndef CONTIGMALLOC_WORKS + else + adapter->rx_mbuf_sz = MJUMPAGESIZE; +#else else if (adapter->hw.mac.max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; - - /* Prepare receive descriptors and buffers */ - if (em_setup_receive_structures(adapter)) { - device_printf(dev, "Could not setup receive structures\n"); - em_stop(adapter); - return; - } - em_initialize_receive_unit(adapter); +#endif + em_initialize_receive_unit(ctx); /* Use real VLAN Filter support? */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { @@ -1440,123 +1249,59 @@ em_init_locked(struct adapter *adapter) } /* Don't lose promiscuous settings */ - em_set_promisc(adapter); - - /* Set the interface as ACTIVE */ - if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); - - callout_reset(&adapter->timer, hz, em_local_timer, adapter); + em_if_set_promisc(ctx, IFF_PROMISC); e1000_clear_hw_cntrs_base_generic(&adapter->hw); /* MSI/X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { - int tmp; - tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); + int tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); + tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); - } + } else if (adapter->intr_type == IFLIB_INTR_MSIX) /* Set up queue routing */ + igb_configure_queues(adapter); -#ifdef DEVICE_POLLING - /* - * Only enable interrupts if we are not polling, make sure - * they are off otherwise. - */ - if (if_getcapenable(ifp) & IFCAP_POLLING) - em_disable_intr(adapter); - else -#endif /* DEVICE_POLLING */ - em_enable_intr(adapter); + /* this clears any pending interrupts */ + E1000_READ_REG(&adapter->hw, E1000_ICR); + E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) em_get_hw_control(adapter); -} - -static void -em_init(void *arg) -{ - struct adapter *adapter = arg; - - EM_CORE_LOCK(adapter); - em_init_locked(adapter); - EM_CORE_UNLOCK(adapter); -} - - -#ifdef DEVICE_POLLING -/********************************************************************* - * - * Legacy polling routine: note this only works with single queue - * - *********************************************************************/ -static int -em_poll(if_t ifp, enum poll_cmd cmd, int count) -{ - struct adapter *adapter = if_getsoftc(ifp); - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - u32 reg_icr; - int rx_done; - - EM_CORE_LOCK(adapter); - if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { - EM_CORE_UNLOCK(adapter); - return (0); - } - if (cmd == POLL_AND_CHECK_STATUS) { - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - callout_stop(&adapter->timer); - adapter->hw.mac.get_link_status = 1; - em_update_link_status(adapter); - callout_reset(&adapter->timer, hz, - em_local_timer, adapter); - } + /* Set Energy Efficient Ethernet */ + if (adapter->hw.mac.type >= igb_mac_min && + adapter->hw.phy.media_type == e1000_media_type_copper) { + if (adapter->hw.mac.type == e1000_i354) + e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); + else + e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); } - EM_CORE_UNLOCK(adapter); - - em_rxeof(rxr, count, &rx_done); - - EM_TX_LOCK(txr); - em_txeof(txr); -#ifdef EM_MULTIQUEUE - if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr); -#else - if (!if_sendq_empty(ifp)) - em_start_locked(ifp, txr); -#endif - EM_TX_UNLOCK(txr); - - return (rx_done); } -#endif /* DEVICE_POLLING */ - /********************************************************************* * - * Fast Legacy/MSI Combined Interrupt Service routine + * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ -static int -em_irq_fast(void *arg) +int +em_intr(void *arg) { - struct adapter *adapter = arg; - if_t ifp; - u32 reg_icr; - - ifp = adapter->ifp; + struct adapter *adapter = arg; + if_ctx_t ctx = adapter->ctx; + u32 reg_icr; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - /* Hot eject? */ + if (adapter->intr_type != IFLIB_INTR_LEGACY) + goto skip_stray; + /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; - /* Definitely not our interrupt. */ + /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; @@ -1568,80 +1313,67 @@ em_irq_fast(void *arg) (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; - em_disable_intr(adapter); - taskqueue_enqueue(adapter->tq, &adapter->que_task); - +skip_stray: /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; - taskqueue_enqueue(taskqueue_fast, &adapter->link_task); + iflib_admin_intr_deferred(ctx); } if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; - return FILTER_HANDLED; + + return (FILTER_SCHEDULE_THREAD); } -/* Combined RX/TX handler, used by Legacy and MSI */ static void -em_handle_que(void *context, int pending) +igb_rx_enable_queue(struct adapter *adapter, struct em_rx_queue *rxq) { - struct adapter *adapter = context; - if_t ifp = adapter->ifp; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL); - - EM_TX_LOCK(txr); - em_txeof(txr); -#ifdef EM_MULTIQUEUE - if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr); -#else - if (!if_sendq_empty(ifp)) - em_start_locked(ifp, txr); -#endif - EM_TX_UNLOCK(txr); - if (more) { - taskqueue_enqueue(adapter->tq, &adapter->que_task); - return; - } - } + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxq->eims); +} - em_enable_intr(adapter); - return; +static void +em_rx_enable_queue(struct adapter *adapter, struct em_rx_queue *rxq) +{ + E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxq->eims); } +static void +igb_tx_enable_queue(struct adapter *adapter, struct em_tx_queue *txq) +{ + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txq->eims); +} -/********************************************************************* - * - * MSIX Interrupt Service Routines - * - **********************************************************************/ static void -em_msix_tx(void *arg) +em_tx_enable_queue(struct adapter *adapter, struct em_tx_queue *txq) { - struct tx_ring *txr = arg; - struct adapter *adapter = txr->adapter; - if_t ifp = adapter->ifp; - - ++txr->tx_irq; - EM_TX_LOCK(txr); - em_txeof(txr); -#ifdef EM_MULTIQUEUE - if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr); -#else - if (!if_sendq_empty(ifp)) - em_start_locked(ifp, txr); -#endif + E1000_WRITE_REG(&adapter->hw, E1000_IMS, txq->eims); +} - /* Reenable this interrupt */ - E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); - EM_TX_UNLOCK(txr); - return; +static int +em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct em_rx_queue *rxq = &adapter->rx_queues[rxqid]; + + if (adapter->hw.mac.type >= igb_mac_min) + igb_rx_enable_queue(adapter, rxq); + else + em_rx_enable_queue(adapter, rxq); + return (0); +} + +static int +em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct em_tx_queue *txq = &adapter->tx_queues[txqid]; + + if (adapter->hw.mac.type >= igb_mac_min) + igb_tx_enable_queue(adapter, txq); + else + em_tx_enable_queue(adapter, txq); + return (0); } /********************************************************************* @@ -1649,25 +1381,14 @@ em_msix_tx(void *arg) * MSIX RX Interrupt Service routine * **********************************************************************/ - -static void -em_msix_rx(void *arg) +static int +em_msix_que(void *arg) { - struct rx_ring *rxr = arg; - struct adapter *adapter = rxr->adapter; - bool more; + struct em_rx_queue *que = arg; - ++rxr->rx_irq; - if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)) - return; - more = em_rxeof(rxr, adapter->rx_process_limit, NULL); - if (more) - taskqueue_enqueue(rxr->tq, &rxr->rx_task); - else { - /* Reenable this interrupt */ - E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); - } - return; + ++que->irqs; + + return (FILTER_SCHEDULE_THREAD); } /********************************************************************* @@ -1675,103 +1396,50 @@ em_msix_rx(void *arg) * MSIX Link Fast Interrupt Service routine * **********************************************************************/ -static void +static int em_msix_link(void *arg) { - struct adapter *adapter = arg; - u32 reg_icr; + struct adapter *adapter = arg; + u32 reg_icr; ++adapter->link_irq; + MPASS(adapter->hw.back != NULL); reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - adapter->hw.mac.get_link_status = 1; - em_handle_link(adapter, 0); - } else + em_handle_link(adapter->ctx); + } else { E1000_WRITE_REG(&adapter->hw, E1000_IMS, - EM_MSIX_LINK | E1000_IMS_LSC); + EM_MSIX_LINK | E1000_IMS_LSC); + if (adapter->hw.mac.type >= igb_mac_min) + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); + } + /* - ** Because we must read the ICR for this interrupt - ** it may clear other causes using autoclear, for - ** this reason we simply create a soft interrupt - ** for all these vectors. - */ - if (reg_icr) { + * Because we must read the ICR for this interrupt + * it may clear other causes using autoclear, for + * this reason we simply create a soft interrupt + * for all these vectors. + */ + if (reg_icr && adapter->hw.mac.type < igb_mac_min) { E1000_WRITE_REG(&adapter->hw, E1000_ICS, adapter->ims); } - return; -} - -static void -em_handle_rx(void *context, int pending) -{ - struct rx_ring *rxr = context; - struct adapter *adapter = rxr->adapter; - bool more; - - more = em_rxeof(rxr, adapter->rx_process_limit, NULL); - if (more) - taskqueue_enqueue(rxr->tq, &rxr->rx_task); - else { - /* Reenable this interrupt */ - E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); - } -} -static void -em_handle_tx(void *context, int pending) -{ - struct tx_ring *txr = context; - struct adapter *adapter = txr->adapter; - if_t ifp = adapter->ifp; - - EM_TX_LOCK(txr); - em_txeof(txr); -#ifdef EM_MULTIQUEUE - if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr); -#else - if (!if_sendq_empty(ifp)) - em_start_locked(ifp, txr); -#endif - E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); - EM_TX_UNLOCK(txr); + return (FILTER_HANDLED); } static void -em_handle_link(void *context, int pending) +em_handle_link(void *context) { - struct adapter *adapter = context; - struct tx_ring *txr = adapter->tx_rings; - if_t ifp = adapter->ifp; - - if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) - return; + if_ctx_t ctx = context; + struct adapter *adapter = iflib_get_softc(ctx); - EM_CORE_LOCK(adapter); - callout_stop(&adapter->timer); - em_update_link_status(adapter); - callout_reset(&adapter->timer, hz, em_local_timer, adapter); - E1000_WRITE_REG(&adapter->hw, E1000_IMS, - EM_MSIX_LINK | E1000_IMS_LSC); - if (adapter->link_active) { - for (int i = 0; i < adapter->num_queues; i++, txr++) { - EM_TX_LOCK(txr); -#ifdef EM_MULTIQUEUE - if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr); -#else - if (if_sendq_empty(ifp)) - em_start_locked(ifp, txr); -#endif - EM_TX_UNLOCK(txr); - } - } - EM_CORE_UNLOCK(adapter); + adapter->hw.mac.get_link_status = 1; + iflib_admin_intr_deferred(ctx); } @@ -1784,21 +1452,19 @@ em_handle_link(void *context, int pending) * **********************************************************************/ static void -em_media_status(if_t ifp, struct ifmediareq *ifmr) +em_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { - struct adapter *adapter = if_getsoftc(ifp); + struct adapter *adapter = iflib_get_softc(ctx); u_char fiber_type = IFM_1000_SX; - INIT_DEBUGOUT("em_media_status: begin"); + INIT_DEBUGOUT("em_if_media_status: begin"); - EM_CORE_LOCK(adapter); - em_update_link_status(adapter); + iflib_admin_intr_deferred(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { - EM_CORE_UNLOCK(adapter); return; } @@ -1806,6 +1472,8 @@ em_media_status(if_t ifp, struct ifmediareq *ifmr) if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { + if (adapter->hw.mac.type == e1000_82545) + fiber_type = IFM_1000_LX; ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { @@ -1824,7 +1492,6 @@ em_media_status(if_t ifp, struct ifmediareq *ifmr) else ifmr->ifm_active |= IFM_HDX; } - EM_CORE_UNLOCK(adapter); } /********************************************************************* @@ -1836,17 +1503,16 @@ em_media_status(if_t ifp, struct ifmediareq *ifmr) * **********************************************************************/ static int -em_media_change(if_t ifp) +em_if_media_change(if_ctx_t ctx) { - struct adapter *adapter = if_getsoftc(ifp); - struct ifmedia *ifm = &adapter->media; + struct adapter *adapter = iflib_get_softc(ctx); + struct ifmedia *ifm = iflib_get_media(ctx); - INIT_DEBUGOUT("em_media_change: begin"); + INIT_DEBUGOUT("em_if_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); - EM_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; @@ -1878,361 +1544,45 @@ em_media_change(if_t ifp) device_printf(adapter->dev, "Unsupported media type\n"); } - em_init_locked(adapter); - EM_CORE_UNLOCK(adapter); + em_if_init(ctx); return (0); } -/********************************************************************* - * - * This routine maps the mbufs to tx descriptors. - * - * return 0 on success, positive on failure - **********************************************************************/ - static int -em_xmit(struct tx_ring *txr, struct mbuf **m_headp) +em_if_set_promisc(if_ctx_t ctx, int flags) { - struct adapter *adapter = txr->adapter; - bus_dma_segment_t segs[EM_MAX_SCATTER]; - bus_dmamap_t map; - struct em_txbuffer *tx_buffer, *tx_buffer_mapped; - struct e1000_tx_desc *ctxd = NULL; - struct mbuf *m_head; - struct ether_header *eh; - struct ip *ip = NULL; - struct tcphdr *tp = NULL; - u32 txd_upper = 0, txd_lower = 0; - int ip_off, poff; - int nsegs, i, j, first, last = 0; - int error; - bool do_tso, tso_desc, remap = TRUE; - - m_head = *m_headp; - do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO); - tso_desc = FALSE; - ip_off = poff = 0; + struct adapter *adapter = iflib_get_softc(ctx); + u32 reg_rctl; - /* - * Intel recommends entire IP/TCP header length reside in a single - * buffer. If multiple descriptors are used to describe the IP and - * TCP header, each descriptor should describe one or more - * complete headers; descriptors referencing only parts of headers - * are not supported. If all layer headers are not coalesced into - * a single buffer, each buffer should not cross a 4KB boundary, - * or be larger than the maximum read request size. - * Controller also requires modifing IP/TCP header to make TSO work - * so we firstly get a writable mbuf chain then coalesce ethernet/ - * IP/TCP header into a single buffer to meet the requirement of - * controller. This also simplifies IP/TCP/UDP checksum offloading - * which also has similar restrictions. - */ - if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { - if (do_tso || (m_head->m_next != NULL && - m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) { - if (M_WRITABLE(*m_headp) == 0) { - m_head = m_dup(*m_headp, M_NOWAIT); - m_freem(*m_headp); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); - } - *m_headp = m_head; - } - } - /* - * XXX - * Assume IPv4, we don't have TSO/checksum offload support - * for IPv6 yet. - */ - ip_off = sizeof(struct ether_header); - if (m_head->m_len < ip_off) { - m_head = m_pullup(m_head, ip_off); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); - } - } - eh = mtod(m_head, struct ether_header *); - if (eh->ether_type == htons(ETHERTYPE_VLAN)) { - ip_off = sizeof(struct ether_vlan_header); - if (m_head->m_len < ip_off) { - m_head = m_pullup(m_head, ip_off); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); - } - } - } - if (m_head->m_len < ip_off + sizeof(struct ip)) { - m_head = m_pullup(m_head, ip_off + sizeof(struct ip)); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); - } - } - ip = (struct ip *)(mtod(m_head, char *) + ip_off); - poff = ip_off + (ip->ip_hl << 2); - - if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) { - if (m_head->m_len < poff + sizeof(struct tcphdr)) { - m_head = m_pullup(m_head, poff + - sizeof(struct tcphdr)); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); - } - } - tp = (struct tcphdr *)(mtod(m_head, char *) + poff); - /* - * TSO workaround: - * pull 4 more bytes of data into it. - */ - if (m_head->m_len < poff + (tp->th_off << 2)) { - m_head = m_pullup(m_head, poff + - (tp->th_off << 2) + - TSO_WORKAROUND); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); - } - } - ip = (struct ip *)(mtod(m_head, char *) + ip_off); - tp = (struct tcphdr *)(mtod(m_head, char *) + poff); - if (do_tso) { - ip->ip_len = htons(m_head->m_pkthdr.tso_segsz + - (ip->ip_hl << 2) + - (tp->th_off << 2)); - ip->ip_sum = 0; - /* - * The pseudo TCP checksum does not include TCP - * payload length so driver should recompute - * the checksum here what hardware expect to - * see. This is adherence of Microsoft's Large - * Send specification. - */ - tp->th_sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htons(IPPROTO_TCP)); - } - } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { - if (m_head->m_len < poff + sizeof(struct udphdr)) { - m_head = m_pullup(m_head, poff + - sizeof(struct udphdr)); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); - } - } - ip = (struct ip *)(mtod(m_head, char *) + ip_off); - } - *m_headp = m_head; - } - - /* - * Map the packet for DMA - * - * Capture the first descriptor index, - * this descriptor will have the index - * of the EOP which is the only one that - * now gets a DONE bit writeback. - */ - first = txr->next_avail_desc; - tx_buffer = &txr->tx_buffers[first]; - tx_buffer_mapped = tx_buffer; - map = tx_buffer->map; - -retry: - error = bus_dmamap_load_mbuf_sg(txr->txtag, map, - *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); - - /* - * There are two types of errors we can (try) to handle: - * - EFBIG means the mbuf chain was too long and bus_dma ran - * out of segments. Defragment the mbuf chain and try again. - * - ENOMEM means bus_dma could not obtain enough bounce buffers - * at this point in time. Defer sending and try again later. - * All other errors, in particular EINVAL, are fatal and prevent the - * mbuf chain from ever going through. Drop it and report error. - */ - if (error == EFBIG && remap) { - struct mbuf *m; - - m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER); - if (m == NULL) { - adapter->mbuf_defrag_failed++; - m_freem(*m_headp); - *m_headp = NULL; - return (ENOBUFS); - } - *m_headp = m; - - /* Try it again, but only once */ - remap = FALSE; - goto retry; - } else if (error != 0) { - adapter->no_tx_dma_setup++; - m_freem(*m_headp); - *m_headp = NULL; - return (error); - } - - /* - * TSO Hardware workaround, if this packet is not - * TSO, and is only a single descriptor long, and - * it follows a TSO burst, then we need to add a - * sentinel descriptor to prevent premature writeback. - */ - if ((!do_tso) && (txr->tx_tso == TRUE)) { - if (nsegs == 1) - tso_desc = TRUE; - txr->tx_tso = FALSE; - } - - if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) { - txr->no_desc_avail++; - bus_dmamap_unload(txr->txtag, map); - return (ENOBUFS); - } - m_head = *m_headp; - - /* Do hardware assists */ - if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { - em_tso_setup(txr, m_head, ip_off, ip, tp, - &txd_upper, &txd_lower); - /* we need to make a final sentinel transmit desc */ - tso_desc = TRUE; - } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) - em_transmit_checksum_setup(txr, m_head, - ip_off, ip, &txd_upper, &txd_lower); - - if (m_head->m_flags & M_VLANTAG) { - /* Set the vlan id. */ - txd_upper |= htole16(if_getvtag(m_head)) << 16; - /* Tell hardware to add tag */ - txd_lower |= htole32(E1000_TXD_CMD_VLE); - } - - i = txr->next_avail_desc; - - /* Set up our transmit descriptors */ - for (j = 0; j < nsegs; j++) { - bus_size_t seg_len; - bus_addr_t seg_addr; - - tx_buffer = &txr->tx_buffers[i]; - ctxd = &txr->tx_base[i]; - seg_addr = segs[j].ds_addr; - seg_len = segs[j].ds_len; - /* - ** TSO Workaround: - ** If this is the last descriptor, we want to - ** split it so we have a small final sentinel - */ - if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { - seg_len -= TSO_WORKAROUND; - ctxd->buffer_addr = htole64(seg_addr); - ctxd->lower.data = htole32( - adapter->txd_cmd | txd_lower | seg_len); - ctxd->upper.data = htole32(txd_upper); - if (++i == adapter->num_tx_desc) - i = 0; - - /* Now make the sentinel */ - txr->tx_avail--; - ctxd = &txr->tx_base[i]; - tx_buffer = &txr->tx_buffers[i]; - ctxd->buffer_addr = - htole64(seg_addr + seg_len); - ctxd->lower.data = htole32( - adapter->txd_cmd | txd_lower | TSO_WORKAROUND); - ctxd->upper.data = - htole32(txd_upper); - last = i; - if (++i == adapter->num_tx_desc) - i = 0; - } else { - ctxd->buffer_addr = htole64(seg_addr); - ctxd->lower.data = htole32( - adapter->txd_cmd | txd_lower | seg_len); - ctxd->upper.data = htole32(txd_upper); - last = i; - if (++i == adapter->num_tx_desc) - i = 0; - } - tx_buffer->m_head = NULL; - tx_buffer->next_eop = -1; - } - - txr->next_avail_desc = i; - txr->tx_avail -= nsegs; - - tx_buffer->m_head = m_head; - /* - ** Here we swap the map so the last descriptor, - ** which gets the completion interrupt has the - ** real map, and the first descriptor gets the - ** unused map from this descriptor. - */ - tx_buffer_mapped->map = tx_buffer->map; - tx_buffer->map = map; - bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); - - /* - * Last Descriptor of Packet - * needs End Of Packet (EOP) - * and Report Status (RS) - */ - ctxd->lower.data |= - htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); - /* - * Keep track in the first buffer which - * descriptor will be written back - */ - tx_buffer = &txr->tx_buffers[first]; - tx_buffer->next_eop = last; - - /* - * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 - * that this frame is available to transmit. - */ - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); - - return (0); -} - -static void -em_set_promisc(struct adapter *adapter) -{ - if_t ifp = adapter->ifp; - u32 reg_rctl; + em_disable_promisc(ctx); reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); - if (if_getflags(ifp) & IFF_PROMISC) { + if (flags & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (em_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); - } else if (if_getflags(ifp) & IFF_ALLMULTI) { + } else if (flags & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } + return (0); } static void -em_disable_promisc(struct adapter *adapter) +em_disable_promisc(if_ctx_t ctx) { - if_t ifp = adapter->ifp; - u32 reg_rctl; - int mcnt = 0; + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + u32 reg_rctl; + int mcnt = 0; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); - reg_rctl &= (~E1000_RCTL_UPE); + reg_rctl &= (~E1000_RCTL_UPE); if (if_getflags(ifp) & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else @@ -2253,9 +1603,10 @@ em_disable_promisc(struct adapter *adapter) **********************************************************************/ static void -em_set_multi(struct adapter *adapter) +em_if_multi_set(if_ctx_t ctx) { - if_t ifp = adapter->ifp; + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; @@ -2265,7 +1616,7 @@ em_set_multi(struct adapter *adapter) mta = adapter->mta; bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); - if (adapter->hw.mac.type == e1000_82542 && + if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) @@ -2284,7 +1635,7 @@ em_set_multi(struct adapter *adapter) } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); - if (adapter->hw.mac.type == e1000_82542 && + if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; @@ -2304,17 +1655,17 @@ em_set_multi(struct adapter *adapter) **********************************************************************/ static void -em_local_timer(void *arg) +em_if_timer(if_ctx_t ctx, uint16_t qid) { - struct adapter *adapter = arg; - if_t ifp = adapter->ifp; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - u32 trigger = 0; + struct adapter *adapter = iflib_get_softc(ctx); + struct em_rx_queue *que; + int i; + int trigger = 0; - EM_CORE_LOCK_ASSERT(adapter); + if (qid != 0) + return; - em_update_link_status(adapter); + em_if_update_admin_status(ctx); em_update_stats_counters(adapter); /* Reset LAA into RAR[0] on 82571 */ @@ -2322,53 +1673,26 @@ em_local_timer(void *arg) e1000_get_laa_state_82571(&adapter->hw)) e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); + if (adapter->hw.mac.type < em_mac_min) + lem_smartspeed(adapter); + /* Mask to use in the irq trigger */ - if (adapter->msix_mem) { - for (int i = 0; i < adapter->num_queues; i++, rxr++) - trigger |= rxr->ims; - rxr = adapter->rx_rings; - } else + if (adapter->intr_type == IFLIB_INTR_MSIX) { + for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) + trigger |= que->eims; + } else { trigger = E1000_ICS_RXDMT0; - - /* - ** Check on the state of the TX queue(s), this - ** can be done without the lock because its RO - ** and the HUNG state will be static if set. - */ - for (int i = 0; i < adapter->num_queues; i++, txr++) { - if (txr->busy == EM_TX_HUNG) - goto hung; - if (txr->busy >= EM_TX_MAXTRIES) - txr->busy = EM_TX_HUNG; - /* Schedule a TX tasklet if needed */ - if (txr->tx_avail <= EM_MAX_SCATTER) - taskqueue_enqueue(txr->tq, &txr->tx_task); } - - callout_reset(&adapter->timer, hz, em_local_timer, adapter); -#ifndef DEVICE_POLLING - /* Trigger an RX interrupt to guarantee mbuf refresh */ - E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); -#endif - return; -hung: - /* Looks like we're hung */ - device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n", - txr->me); - em_print_debug_info(adapter); - if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); - adapter->watchdog_events++; - em_init_locked(adapter); } static void -em_update_link_status(struct adapter *adapter) +em_if_update_admin_status(if_ctx_t ctx) { + struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; - if_t ifp = adapter->ifp; - device_t dev = adapter->dev; - struct tx_ring *txr = adapter->tx_rings; + struct ifnet *ifp = iflib_get_ifp(ctx); + device_t dev = iflib_get_dev(ctx); u32 link_check = 0; /* Get the cached link value or read phy for real */ @@ -2382,13 +1706,14 @@ em_update_link_status(struct adapter *adapter) link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); - } else + } else { link_check = TRUE; + } break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & - E1000_STATUS_LU); + E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); @@ -2403,18 +1728,6 @@ em_update_link_status(struct adapter *adapter) if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); - /* - ** There have proven to be problems with TSO when not - ** at full gigabit speed, so disable the assist automatically - ** when at lower speeds. -jfv - */ - if (adapter->link_speed != SPEED_1000) { - if_sethwassistbits(ifp, 0, CSUM_TSO); - if_setcapenablebit(ifp, 0, IFCAP_TSO4); - if_setcapabilitiesbit(ifp, 0, IFCAP_TSO4); - - } - /* Check if we must disable SPEED_MODE bit on PCI-E */ if ((adapter->link_speed != SPEED_1000) && ((hw->mac.type == e1000_82571) || @@ -2432,7 +1745,8 @@ em_update_link_status(struct adapter *adapter) adapter->link_active = 1; adapter->smartspeed = 0; if_setbaudrate(ifp, adapter->link_speed * 1000000); - if_link_state_change(ifp, LINK_STATE_UP); + iflib_link_state_change(ctx, LINK_STATE_UP, ifp->if_baudrate); + printf("Link state changed to up\n"); } else if (!link_check && (adapter->link_active == 1)) { if_setbaudrate(ifp, 0); adapter->link_speed = 0; @@ -2440,11 +1754,11 @@ em_update_link_status(struct adapter *adapter) if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; - /* Link down, disable hang detection */ - for (int i = 0; i < adapter->num_queues; i++, txr++) - txr->busy = EM_TX_IDLE; - if_link_state_change(ifp, LINK_STATE_DOWN); + iflib_link_state_change(ctx, LINK_STATE_DOWN, ifp->if_baudrate); + printf("link state changed to down\n"); } + + E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); } /********************************************************************* @@ -2457,35 +1771,15 @@ em_update_link_status(struct adapter *adapter) **********************************************************************/ static void -em_stop(void *arg) +em_if_stop(if_ctx_t ctx) { - struct adapter *adapter = arg; - if_t ifp = adapter->ifp; - struct tx_ring *txr = adapter->tx_rings; - - EM_CORE_LOCK_ASSERT(adapter); + struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("em_stop: begin"); - em_disable_intr(adapter); - callout_stop(&adapter->timer); - - /* Tell the stack that the interface is no longer active */ - if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); - - /* Disarm Hang Detection. */ - for (int i = 0; i < adapter->num_queues; i++, txr++) { - EM_TX_LOCK(txr); - txr->busy = EM_TX_IDLE; - EM_TX_UNLOCK(txr); - } - - /* I219 needs some special flushing to avoid hangs */ - if (adapter->hw.mac.type == e1000_pch_spt) - em_flush_desc_rings(adapter); - e1000_reset_hw(&adapter->hw); - E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); + if (adapter->hw.mac.type >= e1000_82544) + E1000_WRITE_REG(&adapter->hw, E1000_WUFC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); @@ -2498,12 +1792,12 @@ em_stop(void *arg) * **********************************************************************/ static void -em_identify_hardware(struct adapter *adapter) +em_identify_hardware(if_ctx_t ctx) { - device_t dev = adapter->dev; + device_t dev = iflib_get_dev(ctx); + struct adapter *adapter = iflib_get_softc(ctx); /* Make sure our PCI config space has the necessary stuff set */ - pci_enable_busmaster(dev); adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ @@ -2523,10 +1817,11 @@ em_identify_hardware(struct adapter *adapter) } static int -em_allocate_pci_resources(struct adapter *adapter) +em_allocate_pci_resources(if_ctx_t ctx) { - device_t dev = adapter->dev; - int rid; + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + int rid, val; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, @@ -2535,498 +1830,398 @@ em_allocate_pci_resources(struct adapter *adapter) device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } - adapter->osdep.mem_bus_space_tag = - rman_get_bustag(adapter->memory); + adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; - adapter->hw.back = &adapter->osdep; - - return (0); -} - -/********************************************************************* - * - * Setup the Legacy or MSI Interrupt handler - * - **********************************************************************/ -int -em_allocate_legacy(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct tx_ring *txr = adapter->tx_rings; - int error, rid = 0; - - /* Manually turn off all interrupts */ - E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); - - if (adapter->msix == 1) /* using MSI */ - rid = 1; - /* We allocate a single interrupt resource */ - adapter->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (adapter->res == NULL) { - device_printf(dev, "Unable to allocate bus resource: " - "interrupt\n"); - return (ENXIO); + /* Only older adapters use IO mapping */ + if (adapter->hw.mac.type < em_mac_min && + adapter->hw.mac.type > e1000_82543) { + /* Figure our where our IO BAR is ? */ + for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { + val = pci_read_config(dev, rid, 4); + if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { + adapter->io_rid = rid; + break; + } + rid += 4; + /* check for 64bit BAR */ + if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT) + rid += 4; + } + if (rid >= PCIR_CIS) { + device_printf(dev, "Unable to locate IO BAR\n"); + return (ENXIO); + } + adapter->ioport = bus_alloc_resource_any(dev, + SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE); + if (adapter->ioport == NULL) { + device_printf(dev, "Unable to allocate bus resource: " + "ioport\n"); + return (ENXIO); + } + adapter->hw.io_base = 0; + adapter->osdep.io_bus_space_tag = + rman_get_bustag(adapter->ioport); + adapter->osdep.io_bus_space_handle = + rman_get_bushandle(adapter->ioport); } - /* - * Allocate a fast interrupt and the associated - * deferred processing contexts. - */ - TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter); - adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, - taskqueue_thread_enqueue, &adapter->tq); - taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que", - device_get_nameunit(adapter->dev)); - /* Use a TX only tasklet for local timer */ - TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); - txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, - taskqueue_thread_enqueue, &txr->tq); - taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", - device_get_nameunit(adapter->dev)); - TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); - if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET, - em_irq_fast, NULL, adapter, &adapter->tag)) != 0) { - device_printf(dev, "Failed to register fast interrupt " - "handler: %d\n", error); - taskqueue_free(adapter->tq); - adapter->tq = NULL; - return (error); - } - + adapter->hw.back = &adapter->osdep; + return (0); } /********************************************************************* * * Setup the MSIX Interrupt handlers - * This is not really Multiqueue, rather - * its just separate interrupt vectors - * for TX, RX, and Link. * **********************************************************************/ -int -em_allocate_msix(struct adapter *adapter) +static int +em_if_msix_intr_assign(if_ctx_t ctx, int msix) { - device_t dev = adapter->dev; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - int error, rid, vector = 0; - int cpu_id = 0; - - - /* Make sure all interrupts are disabled */ - E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); + struct adapter *adapter = iflib_get_softc(ctx); + struct em_rx_queue *rx_que = adapter->rx_queues; + struct em_tx_queue *tx_que = adapter->tx_queues; + int error, rid, i, vector = 0, rx_vectors; + char buf[16]; /* First set up ring resources */ - for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) { - - /* RX ring */ + for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) { rid = vector + 1; - - rxr->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_ACTIVE); - if (rxr->res == NULL) { - device_printf(dev, - "Unable to allocate bus resource: " - "RX MSIX Interrupt %d\n", i); - return (ENXIO); - } - if ((error = bus_setup_intr(dev, rxr->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, - rxr, &rxr->tag)) != 0) { - device_printf(dev, "Failed to register RX handler"); - return (error); + snprintf(buf, sizeof(buf), "rxq%d", i); + error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, em_msix_que, rx_que, rx_que->me, buf); + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); + adapter->rx_num_queues = i + 1; + goto fail; } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i); -#endif - rxr->msix = vector; - - if (em_last_bind_cpu < 0) - em_last_bind_cpu = CPU_FIRST(); - cpu_id = em_last_bind_cpu; - bus_bind_intr(dev, rxr->res, cpu_id); - - TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); - rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, - taskqueue_thread_enqueue, &rxr->tq); - taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)", - device_get_nameunit(adapter->dev), cpu_id); - /* - ** Set the bit to enable interrupt - ** in E1000_IMS -- bits 20 and 21 - ** are for RX0 and RX1, note this has - ** NOTHING to do with the MSIX vector - */ - rxr->ims = 1 << (20 + i); - adapter->ims |= rxr->ims; - adapter->ivars |= (8 | rxr->msix) << (i * 4); - em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); + rx_que->msix = vector; + + /* + * Set the bit to enable interrupt + * in E1000_IMS -- bits 20 and 21 + * are for RX0 and RX1, note this has + * NOTHING to do with the MSIX vector + */ + if (adapter->hw.mac.type == e1000_82574) { + rx_que->eims = 1 << (20 + i); + adapter->ims |= rx_que->eims; + adapter->ivars |= (8 | rx_que->msix) << (i * 4); + } else if (adapter->hw.mac.type == e1000_82575) + rx_que->eims = E1000_EICR_TX_QUEUE0 << vector; + else + rx_que->eims = 1 << vector; } + rx_vectors = vector; - for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) { - /* TX ring */ + vector = 0; + for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) { rid = vector + 1; - txr->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_ACTIVE); - if (txr->res == NULL) { - device_printf(dev, - "Unable to allocate bus resource: " - "TX MSIX Interrupt %d\n", i); - return (ENXIO); - } - if ((error = bus_setup_intr(dev, txr->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, - txr, &txr->tag)) != 0) { - device_printf(dev, "Failed to register TX handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i); -#endif - txr->msix = vector; - - if (em_last_bind_cpu < 0) - em_last_bind_cpu = CPU_FIRST(); - cpu_id = em_last_bind_cpu; - bus_bind_intr(dev, txr->res, cpu_id); - - TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); - txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, - taskqueue_thread_enqueue, &txr->tq); - taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)", - device_get_nameunit(adapter->dev), cpu_id); - /* - ** Set the bit to enable interrupt - ** in E1000_IMS -- bits 22 and 23 - ** are for TX0 and TX1, note this has - ** NOTHING to do with the MSIX vector - */ - txr->ims = 1 << (22 + i); - adapter->ims |= txr->ims; - adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); + snprintf(buf, sizeof(buf), "txq%d", i); + tx_que = &adapter->tx_queues[i]; + iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->me, buf); + + tx_que->msix = (vector % adapter->tx_num_queues); - em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); + /* + * Set the bit to enable interrupt + * in E1000_IMS -- bits 22 and 23 + * are for TX0 and TX1, note this has + * NOTHING to do with the MSIX vector + */ + if (adapter->hw.mac.type == e1000_82574) { + tx_que->eims = 1 << (22 + i); + adapter->ims |= tx_que->eims; + adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4)); + } else if (adapter->hw.mac.type == e1000_82575) { + tx_que->eims = E1000_EICR_TX_QUEUE0 << (i % adapter->tx_num_queues); + } else { + tx_que->eims = 1 << (i % adapter->tx_num_queues); + } } /* Link interrupt */ - rid = vector + 1; - adapter->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (!adapter->res) { - device_printf(dev,"Unable to allocate " - "bus resource: Link interrupt [%d]\n", rid); - return (ENXIO); - } - /* Set the link handler function */ - error = bus_setup_intr(dev, adapter->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, - em_msix_link, adapter, &adapter->tag); + rid = rx_vectors + 1; + error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq"); + if (error) { - adapter->res = NULL; - device_printf(dev, "Failed to register LINK handler"); - return (error); + device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); + goto fail; + } + adapter->linkvec = rx_vectors; + if (adapter->hw.mac.type < igb_mac_min) { + adapter->ivars |= (8 | rx_vectors) << 16; + adapter->ivars |= 0x80000000; } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, adapter->res, adapter->tag, "link"); -#endif - adapter->linkvec = vector; - adapter->ivars |= (8 | vector) << 16; - adapter->ivars |= 0x80000000; - return (0); +fail: + iflib_irq_free(ctx, &adapter->irq); + rx_que = adapter->rx_queues; + for (int i = 0; i < adapter->rx_num_queues; i++, rx_que++) + iflib_irq_free(ctx, &rx_que->que_irq); + return (error); } - static void -em_free_pci_resources(struct adapter *adapter) +igb_configure_queues(struct adapter *adapter) { - device_t dev = adapter->dev; - struct tx_ring *txr; - struct rx_ring *rxr; - int rid; + struct e1000_hw *hw = &adapter->hw; + struct em_rx_queue *rx_que; + struct em_tx_queue *tx_que; + u32 tmp, ivar = 0, newitr = 0; + /* First turn on RSS capability */ + if (adapter->hw.mac.type != e1000_82575) + E1000_WRITE_REG(hw, E1000_GPIE, + E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | + E1000_GPIE_PBA | E1000_GPIE_NSICR); - /* - ** Release all the queue interrupt resources: - */ - for (int i = 0; i < adapter->num_queues; i++) { - txr = &adapter->tx_rings[i]; - /* an early abort? */ - if (txr == NULL) - break; - rid = txr->msix +1; - if (txr->tag != NULL) { - bus_teardown_intr(dev, txr->res, txr->tag); - txr->tag = NULL; + /* Turn on MSIX */ + switch (adapter->hw.mac.type) { + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + case e1000_vfadapt: + case e1000_vfadapt_i350: + /* RX entries */ + for (int i = 0; i < adapter->rx_num_queues; i++) { + u32 index = i >> 1; + ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + rx_que = &adapter->rx_queues[i]; + if (i & 1) { + ivar &= 0xFF00FFFF; + ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; + } else { + ivar &= 0xFFFFFF00; + ivar |= rx_que->msix | E1000_IVAR_VALID; + } + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); + } + /* TX entries */ + for (int i = 0; i < adapter->tx_num_queues; i++) { + u32 index = i >> 1; + ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + tx_que = &adapter->tx_queues[i]; + if (i & 1) { + ivar &= 0x00FFFFFF; + ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; + } else { + ivar &= 0xFFFF00FF; + ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; + } + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); + adapter->que_mask |= tx_que->eims; } - if (txr->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, - rid, txr->res); - rxr = &adapter->rx_rings[i]; - /* an early abort? */ - if (rxr == NULL) - break; - rid = rxr->msix +1; - if (rxr->tag != NULL) { - bus_teardown_intr(dev, rxr->res, rxr->tag); - rxr->tag = NULL; + /* And for the link interrupt */ + ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; + adapter->link_mask = 1 << adapter->linkvec; + E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); + break; + case e1000_82576: + /* RX entries */ + for (int i = 0; i < adapter->rx_num_queues; i++) { + u32 index = i & 0x7; /* Each IVAR has two entries */ + ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + rx_que = &adapter->rx_queues[i]; + if (i < 8) { + ivar &= 0xFFFFFF00; + ivar |= rx_que->msix | E1000_IVAR_VALID; + } else { + ivar &= 0xFF00FFFF; + ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; + } + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); + adapter->que_mask |= rx_que->eims; + } + /* TX entries */ + for (int i = 0; i < adapter->tx_num_queues; i++) { + u32 index = i & 0x7; /* Each IVAR has two entries */ + ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + tx_que = &adapter->tx_queues[i]; + if (i < 8) { + ivar &= 0xFFFF00FF; + ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; + } else { + ivar &= 0x00FFFFFF; + ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; + } + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); + adapter->que_mask |= tx_que->eims; + } + + /* And for the link interrupt */ + ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; + adapter->link_mask = 1 << adapter->linkvec; + E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); + break; + + case e1000_82575: + /* enable MSI-X support*/ + tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); + tmp |= E1000_CTRL_EXT_PBA_CLR; + /* Auto-Mask interrupts upon ICR read. */ + tmp |= E1000_CTRL_EXT_EIAME; + tmp |= E1000_CTRL_EXT_IRCA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); + + /* Queues */ + for (int i = 0; i < adapter->rx_num_queues; i++) { + rx_que = &adapter->rx_queues[i]; + tmp = E1000_EICR_RX_QUEUE0 << i; + tmp |= E1000_EICR_TX_QUEUE0 << i; + rx_que->eims = tmp; + E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), + i, rx_que->eims); + adapter->que_mask |= rx_que->eims; } - if (rxr->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, - rid, rxr->res); + + /* Link */ + E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), + E1000_EIMS_OTHER); + adapter->link_mask |= E1000_EIMS_OTHER; + default: + break; } - if (adapter->linkvec) /* we are doing MSIX */ - rid = adapter->linkvec + 1; - else - (adapter->msix != 0) ? (rid = 1):(rid = 0); + /* Set the starting interrupt rate */ + if (em_max_interrupt_rate > 0) + newitr = (4000000 / em_max_interrupt_rate) & 0x7FFC; + + if (hw->mac.type == e1000_82575) + newitr |= newitr << 16; + else + newitr |= E1000_EITR_CNT_IGNR; - if (adapter->tag != NULL) { - bus_teardown_intr(dev, adapter->res, adapter->tag); - adapter->tag = NULL; + for (int i = 0; i < adapter->rx_num_queues; i++) { + rx_que = &adapter->rx_queues[i]; + E1000_WRITE_REG(hw, E1000_EITR(rx_que->msix), newitr); } - if (adapter->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); + return; +} +static void +em_free_pci_resources(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct em_rx_queue *que = adapter->rx_queues; + device_t dev = iflib_get_dev(ctx); - if (adapter->msix) - pci_release_msi(dev); + /* Release all msix queue resources */ + if (adapter->intr_type == IFLIB_INTR_MSIX) + iflib_irq_free(ctx, &adapter->irq); - if (adapter->msix_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); + for (int i = 0; i < adapter->rx_num_queues; i++, que++) { + iflib_irq_free(ctx, &que->que_irq); + } - if (adapter->memory != NULL) + /* First release all the interrupt resources */ + if (adapter->memory != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), adapter->memory); + PCIR_BAR(0), adapter->memory); + adapter->memory = NULL; + } - if (adapter->flash != NULL) + if (adapter->flash != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, - EM_FLASH, adapter->flash); + EM_FLASH, adapter->flash); + adapter->flash = NULL; + } + if (adapter->ioport != NULL) + bus_release_resource(dev, SYS_RES_IOPORT, + adapter->io_rid, adapter->ioport); } -/* - * Setup MSI or MSI/X - */ +/* Setup MSI or MSI/X */ static int -em_setup_msix(struct adapter *adapter) +em_setup_msix(if_ctx_t ctx) { - device_t dev = adapter->dev; - int val; - - /* Nearly always going to use one queue */ - adapter->num_queues = 1; - - /* - ** Try using MSI-X for Hartwell adapters - */ - if ((adapter->hw.mac.type == e1000_82574) && - (em_enable_msix == TRUE)) { -#ifdef EM_MULTIQUEUE - adapter->num_queues = (em_num_queues == 1) ? 1 : 2; - if (adapter->num_queues > 1) - em_enable_vectors_82574(adapter); -#endif - /* Map the MSIX BAR */ - int rid = PCIR_BAR(EM_MSIX_BAR); - adapter->msix_mem = bus_alloc_resource_any(dev, - SYS_RES_MEMORY, &rid, RF_ACTIVE); - if (adapter->msix_mem == NULL) { - /* May not be enabled */ - device_printf(adapter->dev, - "Unable to map MSIX table \n"); - goto msi; - } - val = pci_msix_count(dev); - -#ifdef EM_MULTIQUEUE - /* We need 5 vectors in the multiqueue case */ - if (adapter->num_queues > 1 ) { - if (val >= 5) - val = 5; - else { - adapter->num_queues = 1; - device_printf(adapter->dev, - "Insufficient MSIX vectors for >1 queue, " - "using single queue...\n"); - goto msix_one; - } - } else { -msix_one: -#endif - if (val >= 3) - val = 3; - else { - device_printf(adapter->dev, - "Insufficient MSIX vectors, using MSI\n"); - goto msi; - } -#ifdef EM_MULTIQUEUE - } -#endif + struct adapter *adapter = iflib_get_softc(ctx); - if ((pci_alloc_msix(dev, &val) == 0)) { - device_printf(adapter->dev, - "Using MSIX interrupts " - "with %d vectors\n", val); - return (val); - } - - /* - ** If MSIX alloc failed or provided us with - ** less than needed, free and fall through to MSI - */ - pci_release_msi(dev); - } -msi: - if (adapter->msix_mem != NULL) { - bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); - adapter->msix_mem = NULL; + if (adapter->hw.mac.type == e1000_82574) { + em_enable_vectors_82574(ctx); } - val = 1; - if (pci_alloc_msi(dev, &val) == 0) { - device_printf(adapter->dev, "Using an MSI interrupt\n"); - return (val); - } - /* Should only happen due to manual configuration */ - device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n"); return (0); } +/********************************************************************* + * + * Initialize the hardware to a configuration + * as specified by the adapter structure. + * + **********************************************************************/ -/* -** The 3 following flush routines are used as a workaround in the -** I219 client parts and only for them. -** -** em_flush_tx_ring - remove all descriptors from the tx_ring -** -** We want to clear all pending descriptors from the TX ring. -** zeroing happens when the HW reads the regs. We assign the ring itself as -** the data of the next descriptor. We don't care about the data we are about -** to reset the HW. -*/ -static void -em_flush_tx_ring(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct tx_ring *txr = adapter->tx_rings; - struct e1000_tx_desc *txd; - u32 tctl, txd_lower = E1000_TXD_CMD_IFCS; - u16 size = 512; - - tctl = E1000_READ_REG(hw, E1000_TCTL); - E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN); - - txd = &txr->tx_base[txr->next_avail_desc++]; - if (txr->next_avail_desc == adapter->num_tx_desc) - txr->next_avail_desc = 0; - - /* Just use the ring as a dummy buffer addr */ - txd->buffer_addr = txr->txdma.dma_paddr; - txd->lower.data = htole32(txd_lower | size); - txd->upper.data = 0; - - /* flush descriptors to memory before notifying the HW */ - wmb(); - - E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc); - mb(); - usec_delay(250); -} - -/* -** em_flush_rx_ring - remove all descriptors from the rx_ring -** -** Mark all descriptors in the RX ring as consumed and disable the rx ring -*/ static void -em_flush_rx_ring(struct adapter *adapter) +lem_smartspeed(struct adapter *adapter) { - struct e1000_hw *hw = &adapter->hw; - u32 rctl, rxdctl; - - rctl = E1000_READ_REG(hw, E1000_RCTL); - E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); - E1000_WRITE_FLUSH(hw); - usec_delay(150); + u16 phy_tmp; - rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); - /* zero the lower 14 bits (prefetch and host thresholds) */ - rxdctl &= 0xffffc000; - /* - * update thresholds: prefetch threshold to 31, host threshold to 1 - * and make sure the granularity is "descriptors" and not "cache lines" - */ - rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC); - E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl); - - /* momentarily enable the RX ring for the changes to take effect */ - E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN); - E1000_WRITE_FLUSH(hw); - usec_delay(150); - E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); -} - -/* -** em_flush_desc_rings - remove all descriptors from the descriptor rings -** -** In i219, the descriptor rings must be emptied before resetting the HW -** or before changing the device state to D3 during runtime (runtime PM). -** -** Failure to do this will cause the HW to enter a unit hang state which can -** only be released by PCI reset on the device -** -*/ -static void -em_flush_desc_rings(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - device_t dev = adapter->dev; - u16 hang_state; - u32 fext_nvm11, tdlen; - - /* First, disable MULR fix in FEXTNVM11 */ - fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11); - fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX; - E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11); - - /* do nothing if we're not in faulty state, or if the queue is empty */ - tdlen = E1000_READ_REG(hw, E1000_TDLEN(0)); - hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); - if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen) + if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) || + adapter->hw.mac.autoneg == 0 || + (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0) return; - em_flush_tx_ring(adapter); - /* recheck, maybe the fault is caused by the rx ring */ - hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); - if (hang_state & FLUSH_DESC_REQUIRED) - em_flush_rx_ring(adapter); + if (adapter->smartspeed == 0) { + /* If Master/Slave config fault is asserted twice, + * we assume back-to-back */ + e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); + if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT)) + return; + e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); + if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) { + e1000_read_phy_reg(&adapter->hw, + PHY_1000T_CTRL, &phy_tmp); + if(phy_tmp & CR_1000T_MS_ENABLE) { + phy_tmp &= ~CR_1000T_MS_ENABLE; + e1000_write_phy_reg(&adapter->hw, + PHY_1000T_CTRL, phy_tmp); + adapter->smartspeed++; + if(adapter->hw.mac.autoneg && + !e1000_copper_link_autoneg(&adapter->hw) && + !e1000_read_phy_reg(&adapter->hw, + PHY_CONTROL, &phy_tmp)) { + phy_tmp |= (MII_CR_AUTO_NEG_EN | + MII_CR_RESTART_AUTO_NEG); + e1000_write_phy_reg(&adapter->hw, + PHY_CONTROL, phy_tmp); + } + } + } + return; + } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) { + /* If still no link, perhaps using 2/3 pair cable */ + e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); + phy_tmp |= CR_1000T_MS_ENABLE; + e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); + if(adapter->hw.mac.autoneg && + !e1000_copper_link_autoneg(&adapter->hw) && + !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { + phy_tmp |= (MII_CR_AUTO_NEG_EN | + MII_CR_RESTART_AUTO_NEG); + e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); + } + } + /* Restart process after EM_SMARTSPEED_MAX iterations */ + if(adapter->smartspeed++ == EM_SMARTSPEED_MAX) + adapter->smartspeed = 0; } -/********************************************************************* - * - * Initialize the hardware to a configuration - * as specified by the adapter structure. - * - **********************************************************************/ static void -em_reset(struct adapter *adapter) +em_reset(if_ctx_t ctx) { - device_t dev = adapter->dev; - if_t ifp = adapter->ifp; - struct e1000_hw *hw = &adapter->hw; - u16 rx_buffer_size; - u32 pba; + device_t dev = iflib_get_dev(ctx); + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + struct e1000_hw *hw = &adapter->hw; + u16 rx_buffer_size; + u32 pba; INIT_DEBUGOUT("em_reset: begin"); @@ -3077,13 +2272,62 @@ em_reset(struct adapter *adapter) case e1000_pch_spt: pba = E1000_PBA_26K; break; + case e1000_82575: + pba = E1000_PBA_32K; + break; + case e1000_82576: + case e1000_vfadapt: + pba = E1000_READ_REG(hw, E1000_RXPBS); + pba &= E1000_RXPBS_SIZE_MASK_82576; + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_vfadapt_i350: + pba = E1000_READ_REG(hw, E1000_RXPBS); + pba = e1000_rxpbs_adjust_82580(pba); + break; + case e1000_i210: + case e1000_i211: + pba = E1000_PBA_34K; + break; default: if (adapter->hw.mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } - E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); + + /* Special needs in case of Jumbo frames */ + if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { + u32 tx_space, min_tx, min_rx; + pba = E1000_READ_REG(hw, E1000_PBA); + tx_space = pba >> 16; + pba &= 0xffff; + min_tx = (adapter->hw.mac.max_frame_size + + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; + min_tx = roundup2(min_tx, 1024); + min_tx >>= 10; + min_rx = adapter->hw.mac.max_frame_size; + min_rx = roundup2(min_rx, 1024); + min_rx >>= 10; + if (tx_space < min_tx && + ((min_tx - tx_space) < pba)) { + pba = pba - (min_tx - tx_space); + /* + * if short on rx space, rx wins + * and must trump tx adjustment + */ + if (pba < min_rx) + pba = min_rx; + } + E1000_WRITE_REG(hw, E1000_PBA, pba); + } + + if (hw->mac.type < igb_mac_min) + E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); + + INIT_DEBUGOUT1("em_reset: pba=%dK",pba); /* * These parameters control the automatic generation (Tx) and @@ -3099,7 +2343,7 @@ em_reset(struct adapter *adapter) * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ - rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 ); + rx_buffer_size = (pba & 0xffff) << 10; hw->fc.high_water = rx_buffer_size - roundup2(adapter->hw.mac.max_frame_size, 1024); hw->fc.low_water = hw->fc.high_water - 1500; @@ -3120,7 +2364,7 @@ em_reset(struct adapter *adapter) switch (hw->mac.type) { case e1000_pchlan: /* Workaround: no TX flow ctrl for PCH */ - hw->fc.requested_mode = e1000_fc_rx_pause; + hw->fc.requested_mode = e1000_fc_rx_pause; hw->fc.pause_time = 0xFFFF; /* override */ if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x3500; @@ -3144,13 +2388,28 @@ em_reset(struct adapter *adapter) else E1000_WRITE_REG(hw, E1000_PBA, 26); break; + case e1000_82575: + case e1000_82576: + /* 8-byte granularity */ + hw->fc.low_water = hw->fc.high_water - 8; + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + case e1000_vfadapt: + case e1000_vfadapt_i350: + /* 16-byte granularity */ + hw->fc.low_water = hw->fc.high_water - 16; + break; case e1000_ich9lan: case e1000_ich10lan: if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x2800; hw->fc.low_water = hw->fc.high_water - 8; break; - } + } /* else fall thru */ default: if (hw->mac.type == e1000_80003es2lan) @@ -3158,13 +2417,9 @@ em_reset(struct adapter *adapter) break; } - /* I219 needs some special flushing to avoid hangs */ - if (hw->mac.type == e1000_pch_spt) - em_flush_desc_rings(adapter); - /* Issue a global reset */ e1000_reset_hw(hw); - E1000_WRITE_REG(hw, E1000_WUC, 0); + E1000_WRITE_REG(hw, E1000_WUFC, 0); em_disable_aspm(adapter); /* and a re-init */ if (e1000_init_hw(hw) < 0) { @@ -3175,7 +2430,145 @@ em_reset(struct adapter *adapter) E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); - return; +} + +#define RSSKEYLEN 10 +static void +em_initialize_rss_mapping(struct adapter *adapter) +{ + uint8_t rss_key[4 * RSSKEYLEN]; + uint32_t reta = 0; + struct e1000_hw *hw = &adapter->hw; + int i; + + /* + * Configure RSS key + */ + arc4rand(rss_key, sizeof(rss_key), 0); + for (i = 0; i < RSSKEYLEN; ++i) { + uint32_t rssrk = 0; + + rssrk = EM_RSSRK_VAL(rss_key, i); + E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); + } + + /* + * Configure RSS redirect table in following fashion: + * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] + */ + for (i = 0; i < sizeof(reta); ++i) { + uint32_t q; + + q = (i % adapter->rx_num_queues) << 7; + reta |= q << (8 * i); + } + + for (i = 0; i < 32; ++i) + E1000_WRITE_REG(hw, E1000_RETA(i), reta); + + E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | + E1000_MRQC_RSS_FIELD_IPV4_TCP | + E1000_MRQC_RSS_FIELD_IPV4 | + E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | + E1000_MRQC_RSS_FIELD_IPV6_EX | + E1000_MRQC_RSS_FIELD_IPV6); + +} + +static void +igb_initialize_rss_mapping(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + int queue_id; + u32 reta; + u32 rss_key[10], mrqc, shift = 0; + + /* XXX? */ + if (adapter->hw.mac.type == e1000_82575) + shift = 6; + + /* + * The redirection table controls which destination + * queue each bucket redirects traffic to. + * Each DWORD represents four queues, with the LSB + * being the first queue in the DWORD. + * + * This just allocates buckets to queues using round-robin + * allocation. + * + * NOTE: It Just Happens to line up with the default + * RSS allocation method. + */ + + /* Warning FM follows */ + reta = 0; + for (i = 0; i < 128; i++) { +#ifdef RSS + queue_id = rss_get_indirection_to_bucket(i); + /* + * If we have more queues than buckets, we'll + * end up mapping buckets to a subset of the + * queues. + * + * If we have more buckets than queues, we'll + * end up instead assigning multiple buckets + * to queues. + * + * Both are suboptimal, but we need to handle + * the case so we don't go out of bounds + * indexing arrays and such. + */ + queue_id = queue_id % adapter->rx_num_queues; +#else + queue_id = (i % adapter->rx_num_queues); +#endif + /* Adjust if required */ + queue_id = queue_id << shift; + + /* + * The low 8 bits are for hash value (n+0); + * The next 8 bits are for hash value (n+1), etc. + */ + reta = reta >> 8; + reta = reta | ( ((uint32_t) queue_id) << 24); + if ((i & 3) == 3) { + E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); + reta = 0; + } + } + + /* Now fill in hash table */ + + /* + * MRQC: Multiple Receive Queues Command + * Set queuing to RSS control, number depends on the device. + */ + mrqc = E1000_MRQC_ENABLE_RSS_8Q; + +#ifdef RSS + /* XXX ew typecasting */ + rss_getkey((uint8_t *) &rss_key); +#else + arc4rand(&rss_key, sizeof(rss_key), 0); +#endif + for (i = 0; i < 10; i++) + E1000_WRITE_REG_ARRAY(hw, + E1000_RSSRK(0), i, rss_key[i]); + + /* + * Configure the RSS fields to hash upon. + */ + mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | + E1000_MRQC_RSS_FIELD_IPV4_TCP); + mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | + E1000_MRQC_RSS_FIELD_IPV6_TCP); + mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | + E1000_MRQC_RSS_FIELD_IPV6_UDP); + mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | + E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); + + E1000_WRITE_REG(hw, E1000_MRQC, mrqc); } /********************************************************************* @@ -3184,497 +2577,239 @@ em_reset(struct adapter *adapter) * **********************************************************************/ static int -em_setup_interface(device_t dev, struct adapter *adapter) +em_setup_interface(if_ctx_t ctx) { - if_t ifp; + struct ifnet *ifp = iflib_get_ifp(ctx); + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + uint64_t cap = 0; INIT_DEBUGOUT("em_setup_interface: begin"); - ifp = adapter->ifp = if_gethandle(IFT_ETHER); - if (ifp == 0) { - device_printf(dev, "can not allocate ifnet structure\n"); - return (-1); - } - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - if_setdev(ifp, dev); - if_setinitfn(ifp, em_init); - if_setsoftc(ifp, adapter); - if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); - if_setioctlfn(ifp, em_ioctl); - if_setgetcounterfn(ifp, em_get_counter); - /* TSO parameters */ - ifp->if_hw_tsomax = IP_MAXPACKET; + if_sethwtsomax(ifp, IP_MAXPACKET); /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */ - ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5; - ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE; - -#ifdef EM_MULTIQUEUE - /* Multiqueue stack interface */ - if_settransmitfn(ifp, em_mq_start); - if_setqflushfn(ifp, em_qflush); -#else - if_setstartfn(ifp, em_start); - if_setsendqlen(ifp, adapter->num_tx_desc - 1); - if_setsendqready(ifp); -#endif + if_sethwtsomaxsegcount(ifp, EM_MAX_SCATTER - 5); + if_sethwtsomaxsegsize(ifp, EM_TSO_SEG_SIZE); - ether_ifattach(ifp, adapter->hw.mac.addr); - - if_setcapabilities(ifp, 0); - if_setcapenable(ifp, 0); + /* Single Queue */ + if (adapter->tx_num_queues == 1) { + if_setsendqlen(ifp, scctx->isc_ntxd[0] - 1); + if_setsendqready(ifp); + } + cap = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4; + cap |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; - if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | - IFCAP_TSO4, 0); /* * Tell the upper layer(s) we * support full VLAN capability */ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); - if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | - IFCAP_VLAN_MTU, 0); - if_setcapenable(ifp, if_getcapabilities(ifp)); + if_setcapabilitiesbit(ifp, cap, 0); /* - ** Don't turn this on by default, if vlans are - ** created on another pseudo device (eg. lagg) - ** then vlan events are not passed thru, breaking - ** operation, but with HW FILTER off it works. If - ** using vlans directly on the em driver you can - ** enable this and get full hardware tag filtering. - */ + * Don't turn this on by default, if vlans are + * created on another pseudo device (eg. lagg) + * then vlan events are not passed thru, breaking + * operation, but with HW FILTER off it works. If + * using vlans directly on the em driver you can + * enable this and get full hardware tag filtering. + */ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0); -#ifdef DEVICE_POLLING - if_setcapabilitiesbit(ifp, IFCAP_POLLING,0); -#endif - /* Enable only WOL MAGIC by default */ if (adapter->wol) { - if_setcapabilitiesbit(ifp, IFCAP_WOL, 0); - if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0); + if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, + IFCAP_WOL_MCAST| IFCAP_WOL_UCAST); + } else { + if_setcapenablebit(ifp, 0, IFCAP_WOL_MAGIC | + IFCAP_WOL_MCAST| IFCAP_WOL_UCAST); } - + /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ - ifmedia_init(&adapter->media, IFM_IMASK, - em_media_change, em_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ - ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, - 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); + if (adapter->hw.mac.type == e1000_82545) + fiber_type = IFM_1000_LX; + ifmedia_add(adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, - 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, - 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, - 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { - ifmedia_add(&adapter->media, - IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); - ifmedia_add(&adapter->media, - IFM_ETHER | IFM_1000_T, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } - ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); - ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); + ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); return (0); } - -/* - * Manage DMA'able memory. - */ -static void -em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) -{ - if (error) - return; - *(bus_addr_t *) arg = segs[0].ds_addr; -} - static int -em_dma_malloc(struct adapter *adapter, bus_size_t size, - struct em_dma_alloc *dma, int mapflags) +em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { - int error; + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + int error = E1000_SUCCESS; + struct em_tx_queue *que; + int i, j; - error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ - EM_DBA_ALIGN, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - size, /* maxsize */ - 1, /* nsegments */ - size, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockarg */ - &dma->dma_tag); - if (error) { - device_printf(adapter->dev, - "%s: bus_dma_tag_create failed: %d\n", - __func__, error); - goto fail_0; - } - - error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, - BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); - if (error) { - device_printf(adapter->dev, - "%s: bus_dmamem_alloc(%ju) failed: %d\n", - __func__, (uintmax_t)size, error); - goto fail_2; - } + MPASS(adapter->tx_num_queues > 0); + MPASS(adapter->tx_num_queues == ntxqsets); - dma->dma_paddr = 0; - error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, - size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); - if (error || dma->dma_paddr == 0) { - device_printf(adapter->dev, - "%s: bus_dmamap_load failed: %d\n", - __func__, error); - goto fail_3; - } - - return (0); - -fail_3: - bus_dmamap_unload(dma->dma_tag, dma->dma_map); -fail_2: - bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); - bus_dma_tag_destroy(dma->dma_tag); -fail_0: - dma->dma_tag = NULL; - - return (error); -} - -static void -em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) -{ - if (dma->dma_tag == NULL) - return; - if (dma->dma_paddr != 0) { - bus_dmamap_sync(dma->dma_tag, dma->dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(dma->dma_tag, dma->dma_map); - dma->dma_paddr = 0; - } - if (dma->dma_vaddr != NULL) { - bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); - dma->dma_vaddr = NULL; - } - bus_dma_tag_destroy(dma->dma_tag); - dma->dma_tag = NULL; -} - - -/********************************************************************* - * - * Allocate memory for the transmit and receive rings, and then - * the descriptors associated with each, called only once at attach. - * - **********************************************************************/ -static int -em_allocate_queues(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct tx_ring *txr = NULL; - struct rx_ring *rxr = NULL; - int rsize, tsize, error = E1000_SUCCESS; - int txconf = 0, rxconf = 0; - - - /* Allocate the TX ring struct memory */ - if (!(adapter->tx_rings = - (struct tx_ring *) malloc(sizeof(struct tx_ring) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate TX ring memory\n"); - error = ENOMEM; - goto fail; - } - - /* Now allocate the RX */ - if (!(adapter->rx_rings = - (struct rx_ring *) malloc(sizeof(struct rx_ring) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate RX ring memory\n"); - error = ENOMEM; - goto rx_fail; + /* First allocate the top level queue structs */ + if (!(adapter->tx_queues = + (struct em_tx_queue *) malloc(sizeof(struct em_tx_queue) * + adapter->tx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); + return(ENOMEM); } - tsize = roundup2(adapter->num_tx_desc * - sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); - /* - * Now set up the TX queues, txconf is needed to handle the - * possibility that things fail midcourse and we need to - * undo memory gracefully - */ - for (int i = 0; i < adapter->num_queues; i++, txconf++) { + for (i = 0, que = adapter->tx_queues; i < adapter->tx_num_queues; i++, que++) { /* Set up some basics */ - txr = &adapter->tx_rings[i]; - txr->adapter = adapter; - txr->me = i; - - /* Initialize the TX lock */ - snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", - device_get_nameunit(dev), txr->me); - mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); - if (em_dma_malloc(adapter, tsize, - &txr->txdma, BUS_DMA_NOWAIT)) { - device_printf(dev, - "Unable to allocate TX Descriptor memory\n"); - error = ENOMEM; - goto err_tx_desc; - } - txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr; - bzero((void *)txr->tx_base, tsize); - - if (em_allocate_transmit_buffers(txr)) { - device_printf(dev, - "Critical Failure setting up transmit buffers\n"); - error = ENOMEM; - goto err_tx_desc; - } -#if __FreeBSD_version >= 800000 - /* Allocate a buf ring */ - txr->br = buf_ring_alloc(4096, M_DEVBUF, - M_WAITOK, &txr->tx_mtx); -#endif - } + struct tx_ring *txr = &que->txr; + txr->adapter = que->adapter = adapter; + que->me = txr->me = i; - /* - * Next the RX queues... - */ - rsize = roundup2(adapter->num_rx_desc * - sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); - for (int i = 0; i < adapter->num_queues; i++, rxconf++) { - rxr = &adapter->rx_rings[i]; - rxr->adapter = adapter; - rxr->me = i; - - /* Initialize the RX lock */ - snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", - device_get_nameunit(dev), txr->me); - mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); - - if (em_dma_malloc(adapter, rsize, - &rxr->rxdma, BUS_DMA_NOWAIT)) { - device_printf(dev, - "Unable to allocate RxDescriptor memory\n"); + /* Allocate report status array */ + if (!(txr->tx_rsq = (qidx_t *) malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(iflib_get_dev(ctx), "failed to allocate rs_idxs memory\n"); error = ENOMEM; - goto err_rx_desc; - } - rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr; - bzero((void *)rxr->rx_base, rsize); - - /* Allocate receive buffers for the ring*/ - if (em_allocate_receive_buffers(rxr)) { - device_printf(dev, - "Critical Failure setting up receive buffers\n"); - error = ENOMEM; - goto err_rx_desc; + goto fail; } + for (j = 0; j < scctx->isc_ntxd[0]; j++) + txr->tx_rsq[j] = QIDX_INVALID; + /* get the virtual and physical address of the hardware queues */ + txr->tx_base = (struct e1000_tx_desc *)vaddrs[i*ntxqs]; + txr->tx_paddr = paddrs[i*ntxqs]; } - + + device_printf(iflib_get_dev(ctx), "allocated for %d tx_queues\n", adapter->tx_num_queues); return (0); - -err_rx_desc: - for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) - em_dma_free(adapter, &rxr->rxdma); -err_tx_desc: - for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) - em_dma_free(adapter, &txr->txdma); - free(adapter->rx_rings, M_DEVBUF); -rx_fail: -#if __FreeBSD_version >= 800000 - buf_ring_free(txr->br, M_DEVBUF); -#endif - free(adapter->tx_rings, M_DEVBUF); fail: + em_if_queues_free(ctx); return (error); } - -/********************************************************************* - * - * Allocate memory for tx_buffer structures. The tx_buffer stores all - * the information needed to transmit a packet on the wire. This is - * called only once at attach, setup is done every reset. - * - **********************************************************************/ static int -em_allocate_transmit_buffers(struct tx_ring *txr) +em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { - struct adapter *adapter = txr->adapter; - device_t dev = adapter->dev; - struct em_txbuffer *txbuf; - int error, i; + struct adapter *adapter = iflib_get_softc(ctx); + int error = E1000_SUCCESS; + struct em_rx_queue *que; + int i; - /* - * Setup DMA descriptor areas. - */ - if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - EM_TSO_SIZE, /* maxsize */ - EM_MAX_SCATTER, /* nsegments */ - PAGE_SIZE, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockfuncarg */ - &txr->txtag))) { - device_printf(dev,"Unable to allocate TX DMA tag\n"); - goto fail; - } + MPASS(adapter->rx_num_queues > 0); + MPASS(adapter->rx_num_queues == nrxqsets); - if (!(txr->tx_buffers = - (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) * - adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate tx_buffer memory\n"); + /* First allocate the top level queue structs */ + if (!(adapter->rx_queues = + (struct em_rx_queue *) malloc(sizeof(struct em_rx_queue) * + adapter->rx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); error = ENOMEM; - goto fail; + goto fail; } - /* Create the descriptor buffer dma maps */ - txbuf = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { - error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); - if (error != 0) { - device_printf(dev, "Unable to create TX DMA map\n"); - goto fail; - } + for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { + /* Set up some basics */ + struct rx_ring *rxr = &que->rxr; + rxr->adapter = que->adapter = adapter; + rxr->que = que; + que->me = rxr->me = i; + + /* get the virtual and physical address of the hardware queues */ + rxr->rx_base = (union e1000_rx_desc_extended *)vaddrs[i*nrxqs]; + rxr->rx_paddr = paddrs[i*nrxqs]; } - return 0; + device_printf(iflib_get_dev(ctx), "allocated for %d rx_queues\n", adapter->rx_num_queues); + + return (0); fail: - /* We free all, it handles case where we are in the middle */ - em_free_transmit_structures(adapter); + em_if_queues_free(ctx); return (error); } -/********************************************************************* - * - * Initialize a transmit ring. - * - **********************************************************************/ static void -em_setup_transmit_ring(struct tx_ring *txr) +em_if_queues_free(if_ctx_t ctx) { - struct adapter *adapter = txr->adapter; - struct em_txbuffer *txbuf; - int i; -#ifdef DEV_NETMAP - struct netmap_slot *slot; - struct netmap_adapter *na = netmap_getna(adapter->ifp); -#endif /* DEV_NETMAP */ - - /* Clear the old descriptor contents */ - EM_TX_LOCK(txr); -#ifdef DEV_NETMAP - slot = netmap_reset(na, NR_TX, txr->me, 0); -#endif /* DEV_NETMAP */ - - bzero((void *)txr->tx_base, - (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); - /* Reset indices */ - txr->next_avail_desc = 0; - txr->next_to_clean = 0; - - /* Free any existing tx buffers. */ - txbuf = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { - if (txbuf->m_head != NULL) { - bus_dmamap_sync(txr->txtag, txbuf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, txbuf->map); - m_freem(txbuf->m_head); - txbuf->m_head = NULL; - } -#ifdef DEV_NETMAP - if (slot) { - int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); - uint64_t paddr; - void *addr; - - addr = PNMB(na, slot + si, &paddr); - txr->tx_base[i].buffer_addr = htole64(paddr); - /* reload the map for netmap mode */ - netmap_load_map(na, txr->txtag, txbuf->map, addr); - } -#endif /* DEV_NETMAP */ + struct adapter *adapter = iflib_get_softc(ctx); + struct em_tx_queue *tx_que = adapter->tx_queues; + struct em_rx_queue *rx_que = adapter->rx_queues; + + if (tx_que != NULL) { + for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; + if (txr->tx_rsq == NULL) + break; - /* clear the watch index */ - txbuf->next_eop = -1; - } + free(txr->tx_rsq, M_DEVBUF); + txr->tx_rsq = NULL; + } + free(adapter->tx_queues, M_DEVBUF); + adapter->tx_queues = NULL; + } - /* Set number of descriptors available */ - txr->tx_avail = adapter->num_tx_desc; - txr->busy = EM_TX_IDLE; + if (rx_que != NULL) { + free(adapter->rx_queues, M_DEVBUF); + adapter->rx_queues = NULL; + } - /* Clear checksum offload context. */ - txr->last_hw_offload = 0; - txr->last_hw_ipcss = 0; - txr->last_hw_ipcso = 0; - txr->last_hw_tucss = 0; - txr->last_hw_tucso = 0; + em_release_hw_control(adapter); - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - EM_TX_UNLOCK(txr); + if (adapter->mta != NULL) { + free(adapter->mta, M_DEVBUF); + } } /********************************************************************* * - * Initialize all transmit rings. + * Enable transmit unit. * **********************************************************************/ static void -em_setup_transmit_structures(struct adapter *adapter) +em_initialize_transmit_unit(if_ctx_t ctx) { - struct tx_ring *txr = adapter->tx_rings; + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct em_tx_queue *que; + struct tx_ring *txr; + struct e1000_hw *hw = &adapter->hw; + u32 tctl, txdctl = 0, tarc, tipg = 0; - for (int i = 0; i < adapter->num_queues; i++, txr++) - em_setup_transmit_ring(txr); + INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); - return; -} + for (int i = 0; i < adapter->tx_num_queues; i++, txr++) { + u64 bus_addr; + caddr_t offp, endp; -/********************************************************************* - * - * Enable transmit unit. - * - **********************************************************************/ -static void -em_initialize_transmit_unit(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - struct e1000_hw *hw = &adapter->hw; - u32 tctl, txdctl = 0, tarc, tipg = 0; + que = &adapter->tx_queues[i]; + txr = &que->txr; + bus_addr = txr->tx_paddr; - INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); + /* Clear checksum offload context. */ + offp = (caddr_t)&txr->csum_flags; + endp = (caddr_t)(txr + 1); + bzero(offp, endp - offp); - for (int i = 0; i < adapter->num_queues; i++, txr++) { - u64 bus_addr = txr->txdma.dma_paddr; /* Base and Len of TX Ring */ E1000_WRITE_REG(hw, E1000_TDLEN(i), - adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); + scctx->isc_ntxd[0] * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), - (u32)(bus_addr >> 32)); + (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), - (u32)bus_addr); + (u32)bus_addr); /* Init the HEAD/TAIL indices */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); @@ -3683,16 +2818,15 @@ em_initialize_transmit_unit(struct adapter *adapter) E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); - txr->busy = EM_TX_IDLE; txdctl = 0; /* clear txdctl */ - txdctl |= 0x1f; /* PTHRESH */ - txdctl |= 1 << 8; /* HTHRESH */ - txdctl |= 1 << 16;/* WTHRESH */ + txdctl |= 0x1f; /* PTHRESH */ + txdctl |= 1 << 8; /* HTHRESH */ + txdctl |= 1 << 16;/* WTHRESH */ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ txdctl |= E1000_TXDCTL_GRAN; - txdctl |= 1 << 25; /* LWTHRESH */ + txdctl |= 1 << 25; /* LWTHRESH */ - E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); + E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } /* Set the default values for the Tx Inter Packet Gap timer */ @@ -3702,6 +2836,11 @@ em_initialize_transmit_unit(struct adapter *adapter) tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; + case e1000_82542: + tipg = DEFAULT_82542_TIPG_IPGT; + tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; + tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; + break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == @@ -3736,7 +2875,7 @@ em_initialize_transmit_unit(struct adapter *adapter) } else if (adapter->hw.mac.type == e1000_82574) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_ERRATA_BIT; - if ( adapter->num_queues > 1) { + if ( adapter->tx_num_queues > 1) { tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); @@ -3744,7 +2883,6 @@ em_initialize_transmit_unit(struct adapter *adapter) E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } - adapter->txd_cmd = E1000_TXD_CMD_IFCS; if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; @@ -3771,780 +2909,6 @@ em_initialize_transmit_unit(struct adapter *adapter) } } - -/********************************************************************* - * - * Free all transmit rings. - * - **********************************************************************/ -static void -em_free_transmit_structures(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - EM_TX_LOCK(txr); - em_free_transmit_buffers(txr); - em_dma_free(adapter, &txr->txdma); - EM_TX_UNLOCK(txr); - EM_TX_LOCK_DESTROY(txr); - } - - free(adapter->tx_rings, M_DEVBUF); -} - -/********************************************************************* - * - * Free transmit ring related data structures. - * - **********************************************************************/ -static void -em_free_transmit_buffers(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct em_txbuffer *txbuf; - - INIT_DEBUGOUT("free_transmit_ring: begin"); - - if (txr->tx_buffers == NULL) - return; - - for (int i = 0; i < adapter->num_tx_desc; i++) { - txbuf = &txr->tx_buffers[i]; - if (txbuf->m_head != NULL) { - bus_dmamap_sync(txr->txtag, txbuf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, - txbuf->map); - m_freem(txbuf->m_head); - txbuf->m_head = NULL; - if (txbuf->map != NULL) { - bus_dmamap_destroy(txr->txtag, - txbuf->map); - txbuf->map = NULL; - } - } else if (txbuf->map != NULL) { - bus_dmamap_unload(txr->txtag, - txbuf->map); - bus_dmamap_destroy(txr->txtag, - txbuf->map); - txbuf->map = NULL; - } - } -#if __FreeBSD_version >= 800000 - if (txr->br != NULL) - buf_ring_free(txr->br, M_DEVBUF); -#endif - if (txr->tx_buffers != NULL) { - free(txr->tx_buffers, M_DEVBUF); - txr->tx_buffers = NULL; - } - if (txr->txtag != NULL) { - bus_dma_tag_destroy(txr->txtag); - txr->txtag = NULL; - } - return; -} - - -/********************************************************************* - * The offload context is protocol specific (TCP/UDP) and thus - * only needs to be set when the protocol changes. The occasion - * of a context change can be a performance detriment, and - * might be better just disabled. The reason arises in the way - * in which the controller supports pipelined requests from the - * Tx data DMA. Up to four requests can be pipelined, and they may - * belong to the same packet or to multiple packets. However all - * requests for one packet are issued before a request is issued - * for a subsequent packet and if a request for the next packet - * requires a context change, that request will be stalled - * until the previous request completes. This means setting up - * a new context effectively disables pipelined Tx data DMA which - * in turn greatly slow down performance to send small sized - * frames. - **********************************************************************/ -static void -em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, - struct ip *ip, u32 *txd_upper, u32 *txd_lower) -{ - struct adapter *adapter = txr->adapter; - struct e1000_context_desc *TXD = NULL; - struct em_txbuffer *tx_buffer; - int cur, hdr_len; - u32 cmd = 0; - u16 offload = 0; - u8 ipcso, ipcss, tucso, tucss; - - ipcss = ipcso = tucss = tucso = 0; - hdr_len = ip_off + (ip->ip_hl << 2); - cur = txr->next_avail_desc; - - /* Setup of IP header checksum. */ - if (mp->m_pkthdr.csum_flags & CSUM_IP) { - *txd_upper |= E1000_TXD_POPTS_IXSM << 8; - offload |= CSUM_IP; - ipcss = ip_off; - ipcso = ip_off + offsetof(struct ip, ip_sum); - /* - * Start offset for header checksum calculation. - * End offset for header checksum calculation. - * Offset of place to put the checksum. - */ - TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; - TXD->lower_setup.ip_fields.ipcss = ipcss; - TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); - TXD->lower_setup.ip_fields.ipcso = ipcso; - cmd |= E1000_TXD_CMD_IP; - } - - if (mp->m_pkthdr.csum_flags & CSUM_TCP) { - *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; - *txd_upper |= E1000_TXD_POPTS_TXSM << 8; - offload |= CSUM_TCP; - tucss = hdr_len; - tucso = hdr_len + offsetof(struct tcphdr, th_sum); - /* - * The 82574L can only remember the *last* context used - * regardless of queue that it was use for. We cannot reuse - * contexts on this hardware platform and must generate a new - * context every time. 82574L hardware spec, section 7.2.6, - * second note. - */ - if (adapter->num_queues < 2) { - /* - * Setting up new checksum offload context for every - * frames takes a lot of processing time for hardware. - * This also reduces performance a lot for small sized - * frames so avoid it if driver can use previously - * configured checksum offload context. - */ - if (txr->last_hw_offload == offload) { - if (offload & CSUM_IP) { - if (txr->last_hw_ipcss == ipcss && - txr->last_hw_ipcso == ipcso && - txr->last_hw_tucss == tucss && - txr->last_hw_tucso == tucso) - return; - } else { - if (txr->last_hw_tucss == tucss && - txr->last_hw_tucso == tucso) - return; - } - } - txr->last_hw_offload = offload; - txr->last_hw_tucss = tucss; - txr->last_hw_tucso = tucso; - } - /* - * Start offset for payload checksum calculation. - * End offset for payload checksum calculation. - * Offset of place to put the checksum. - */ - TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; - TXD->upper_setup.tcp_fields.tucss = hdr_len; - TXD->upper_setup.tcp_fields.tucse = htole16(0); - TXD->upper_setup.tcp_fields.tucso = tucso; - cmd |= E1000_TXD_CMD_TCP; - } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) { - *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; - *txd_upper |= E1000_TXD_POPTS_TXSM << 8; - tucss = hdr_len; - tucso = hdr_len + offsetof(struct udphdr, uh_sum); - /* - * The 82574L can only remember the *last* context used - * regardless of queue that it was use for. We cannot reuse - * contexts on this hardware platform and must generate a new - * context every time. 82574L hardware spec, section 7.2.6, - * second note. - */ - if (adapter->num_queues < 2) { - /* - * Setting up new checksum offload context for every - * frames takes a lot of processing time for hardware. - * This also reduces performance a lot for small sized - * frames so avoid it if driver can use previously - * configured checksum offload context. - */ - if (txr->last_hw_offload == offload) { - if (offload & CSUM_IP) { - if (txr->last_hw_ipcss == ipcss && - txr->last_hw_ipcso == ipcso && - txr->last_hw_tucss == tucss && - txr->last_hw_tucso == tucso) - return; - } else { - if (txr->last_hw_tucss == tucss && - txr->last_hw_tucso == tucso) - return; - } - } - txr->last_hw_offload = offload; - txr->last_hw_tucss = tucss; - txr->last_hw_tucso = tucso; - } - /* - * Start offset for header checksum calculation. - * End offset for header checksum calculation. - * Offset of place to put the checksum. - */ - TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; - TXD->upper_setup.tcp_fields.tucss = tucss; - TXD->upper_setup.tcp_fields.tucse = htole16(0); - TXD->upper_setup.tcp_fields.tucso = tucso; - } - - if (offload & CSUM_IP) { - txr->last_hw_ipcss = ipcss; - txr->last_hw_ipcso = ipcso; - } - - TXD->tcp_seg_setup.data = htole32(0); - TXD->cmd_and_length = - htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); - tx_buffer = &txr->tx_buffers[cur]; - tx_buffer->m_head = NULL; - tx_buffer->next_eop = -1; - - if (++cur == adapter->num_tx_desc) - cur = 0; - - txr->tx_avail--; - txr->next_avail_desc = cur; -} - - -/********************************************************************** - * - * Setup work for hardware segmentation offload (TSO) - * - **********************************************************************/ -static void -em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, - struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower) -{ - struct adapter *adapter = txr->adapter; - struct e1000_context_desc *TXD; - struct em_txbuffer *tx_buffer; - int cur, hdr_len; - - /* - * In theory we can use the same TSO context if and only if - * frame is the same type(IP/TCP) and the same MSS. However - * checking whether a frame has the same IP/TCP structure is - * hard thing so just ignore that and always restablish a - * new TSO context. - */ - hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2); - *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ - E1000_TXD_DTYP_D | /* Data descr type */ - E1000_TXD_CMD_TSE); /* Do TSE on this packet */ - - /* IP and/or TCP header checksum calculation and insertion. */ - *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; - - cur = txr->next_avail_desc; - tx_buffer = &txr->tx_buffers[cur]; - TXD = (struct e1000_context_desc *) &txr->tx_base[cur]; - - /* - * Start offset for header checksum calculation. - * End offset for header checksum calculation. - * Offset of place put the checksum. - */ - TXD->lower_setup.ip_fields.ipcss = ip_off; - TXD->lower_setup.ip_fields.ipcse = - htole16(ip_off + (ip->ip_hl << 2) - 1); - TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum); - /* - * Start offset for payload checksum calculation. - * End offset for payload checksum calculation. - * Offset of place to put the checksum. - */ - TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2); - TXD->upper_setup.tcp_fields.tucse = 0; - TXD->upper_setup.tcp_fields.tucso = - ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum); - /* - * Payload size per packet w/o any headers. - * Length of all headers up to payload. - */ - TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz); - TXD->tcp_seg_setup.fields.hdr_len = hdr_len; - - TXD->cmd_and_length = htole32(adapter->txd_cmd | - E1000_TXD_CMD_DEXT | /* Extended descr */ - E1000_TXD_CMD_TSE | /* TSE context */ - E1000_TXD_CMD_IP | /* Do IP csum */ - E1000_TXD_CMD_TCP | /* Do TCP checksum */ - (mp->m_pkthdr.len - (hdr_len))); /* Total len */ - - tx_buffer->m_head = NULL; - tx_buffer->next_eop = -1; - - if (++cur == adapter->num_tx_desc) - cur = 0; - - txr->tx_avail--; - txr->next_avail_desc = cur; - txr->tx_tso = TRUE; -} - - -/********************************************************************** - * - * Examine each tx_buffer in the used queue. If the hardware is done - * processing the packet then free associated resources. The - * tx_buffer is put back on the free queue. - * - **********************************************************************/ -static void -em_txeof(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - int first, last, done, processed; - struct em_txbuffer *tx_buffer; - struct e1000_tx_desc *tx_desc, *eop_desc; - if_t ifp = adapter->ifp; - - EM_TX_LOCK_ASSERT(txr); -#ifdef DEV_NETMAP - if (netmap_tx_irq(ifp, txr->me)) - return; -#endif /* DEV_NETMAP */ - - /* No work, make sure hang detection is disabled */ - if (txr->tx_avail == adapter->num_tx_desc) { - txr->busy = EM_TX_IDLE; - return; - } - - processed = 0; - first = txr->next_to_clean; - tx_desc = &txr->tx_base[first]; - tx_buffer = &txr->tx_buffers[first]; - last = tx_buffer->next_eop; - eop_desc = &txr->tx_base[last]; - - /* - * What this does is get the index of the - * first descriptor AFTER the EOP of the - * first packet, that way we can do the - * simple comparison on the inner while loop. - */ - if (++last == adapter->num_tx_desc) - last = 0; - done = last; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - - while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { - /* We clean the range of the packet */ - while (first != done) { - tx_desc->upper.data = 0; - tx_desc->lower.data = 0; - tx_desc->buffer_addr = 0; - ++txr->tx_avail; - ++processed; - - if (tx_buffer->m_head) { - bus_dmamap_sync(txr->txtag, - tx_buffer->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, - tx_buffer->map); - m_freem(tx_buffer->m_head); - tx_buffer->m_head = NULL; - } - tx_buffer->next_eop = -1; - - if (++first == adapter->num_tx_desc) - first = 0; - - tx_buffer = &txr->tx_buffers[first]; - tx_desc = &txr->tx_base[first]; - } - if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); - /* See if we can continue to the next packet */ - last = tx_buffer->next_eop; - if (last != -1) { - eop_desc = &txr->tx_base[last]; - /* Get new done point */ - if (++last == adapter->num_tx_desc) last = 0; - done = last; - } else - break; - } - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - txr->next_to_clean = first; - - /* - ** Hang detection: we know there's work outstanding - ** or the entry return would have been taken, so no - ** descriptor processed here indicates a potential hang. - ** The local timer will examine this and do a reset if needed. - */ - if (processed == 0) { - if (txr->busy != EM_TX_HUNG) - ++txr->busy; - } else /* At least one descriptor was cleaned */ - txr->busy = EM_TX_BUSY; /* note this clears HUNG */ - - /* - * If we have a minimum free, clear IFF_DRV_OACTIVE - * to tell the stack that it is OK to send packets. - * Notice that all writes of OACTIVE happen under the - * TX lock which, with a single queue, guarantees - * sanity. - */ - if (txr->tx_avail >= EM_MAX_SCATTER) { - if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); - } - - /* Disable hang detection if all clean */ - if (txr->tx_avail == adapter->num_tx_desc) - txr->busy = EM_TX_IDLE; -} - -/********************************************************************* - * - * Refresh RX descriptor mbufs from system mbuf buffer pool. - * - **********************************************************************/ -static void -em_refresh_mbufs(struct rx_ring *rxr, int limit) -{ - struct adapter *adapter = rxr->adapter; - struct mbuf *m; - bus_dma_segment_t segs; - struct em_rxbuffer *rxbuf; - int i, j, error, nsegs; - bool cleaned = FALSE; - - i = j = rxr->next_to_refresh; - /* - ** Get one descriptor beyond - ** our work mark to control - ** the loop. - */ - if (++j == adapter->num_rx_desc) - j = 0; - - while (j != limit) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->m_head == NULL) { - m = m_getjcl(M_NOWAIT, MT_DATA, - M_PKTHDR, adapter->rx_mbuf_sz); - /* - ** If we have a temporary resource shortage - ** that causes a failure, just abort refresh - ** for now, we will return to this point when - ** reinvoked from em_rxeof. - */ - if (m == NULL) - goto update; - } else - m = rxbuf->m_head; - - m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; - m->m_flags |= M_PKTHDR; - m->m_data = m->m_ext.ext_buf; - - /* Use bus_dma machinery to setup the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, - m, &segs, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - printf("Refresh mbufs: hdr dmamap load" - " failure - %d\n", error); - m_free(m); - rxbuf->m_head = NULL; - goto update; - } - rxbuf->m_head = m; - rxbuf->paddr = segs.ds_addr; - bus_dmamap_sync(rxr->rxtag, - rxbuf->map, BUS_DMASYNC_PREREAD); - em_setup_rxdesc(&rxr->rx_base[i], rxbuf); - cleaned = TRUE; - - i = j; /* Next is precalulated for us */ - rxr->next_to_refresh = i; - /* Calculate next controlling index */ - if (++j == adapter->num_rx_desc) - j = 0; - } -update: - /* - ** Update the tail pointer only if, - ** and as far as we have refreshed. - */ - if (cleaned) - E1000_WRITE_REG(&adapter->hw, - E1000_RDT(rxr->me), rxr->next_to_refresh); - - return; -} - - -/********************************************************************* - * - * Allocate memory for rx_buffer structures. Since we use one - * rx_buffer per received packet, the maximum number of rx_buffer's - * that we'll need is equal to the number of receive descriptors - * that we've allocated. - * - **********************************************************************/ -static int -em_allocate_receive_buffers(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - device_t dev = adapter->dev; - struct em_rxbuffer *rxbuf; - int error; - - rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) * - adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); - if (rxr->rx_buffers == NULL) { - device_printf(dev, "Unable to allocate rx_buffer memory\n"); - return (ENOMEM); - } - - error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - MJUM9BYTES, /* maxsize */ - 1, /* nsegments */ - MJUM9BYTES, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockarg */ - &rxr->rxtag); - if (error) { - device_printf(dev, "%s: bus_dma_tag_create failed %d\n", - __func__, error); - goto fail; - } - - rxbuf = rxr->rx_buffers; - for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) { - rxbuf = &rxr->rx_buffers[i]; - error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map); - if (error) { - device_printf(dev, "%s: bus_dmamap_create failed: %d\n", - __func__, error); - goto fail; - } - } - - return (0); - -fail: - em_free_receive_structures(adapter); - return (error); -} - - -/********************************************************************* - * - * Initialize a receive ring and its buffers. - * - **********************************************************************/ -static int -em_setup_receive_ring(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - struct em_rxbuffer *rxbuf; - bus_dma_segment_t seg[1]; - int rsize, nsegs, error = 0; -#ifdef DEV_NETMAP - struct netmap_slot *slot; - struct netmap_adapter *na = netmap_getna(adapter->ifp); -#endif - - - /* Clear the ring contents */ - EM_RX_LOCK(rxr); - rsize = roundup2(adapter->num_rx_desc * - sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); - bzero((void *)rxr->rx_base, rsize); -#ifdef DEV_NETMAP - slot = netmap_reset(na, NR_RX, rxr->me, 0); -#endif - - /* - ** Free current RX buffer structs and their mbufs - */ - for (int i = 0; i < adapter->num_rx_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->m_head != NULL) { - bus_dmamap_sync(rxr->rxtag, rxbuf->map, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->rxtag, rxbuf->map); - m_freem(rxbuf->m_head); - rxbuf->m_head = NULL; /* mark as freed */ - } - } - - /* Now replenish the mbufs */ - for (int j = 0; j != adapter->num_rx_desc; ++j) { - rxbuf = &rxr->rx_buffers[j]; -#ifdef DEV_NETMAP - if (slot) { - int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j); - uint64_t paddr; - void *addr; - - addr = PNMB(na, slot + si, &paddr); - netmap_load_map(na, rxr->rxtag, rxbuf->map, addr); - rxbuf->paddr = paddr; - em_setup_rxdesc(&rxr->rx_base[j], rxbuf); - continue; - } -#endif /* DEV_NETMAP */ - rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA, - M_PKTHDR, adapter->rx_mbuf_sz); - if (rxbuf->m_head == NULL) { - error = ENOBUFS; - goto fail; - } - rxbuf->m_head->m_len = adapter->rx_mbuf_sz; - rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */ - rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz; - - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->rxtag, - rxbuf->map, rxbuf->m_head, seg, - &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - m_freem(rxbuf->m_head); - rxbuf->m_head = NULL; - goto fail; - } - bus_dmamap_sync(rxr->rxtag, - rxbuf->map, BUS_DMASYNC_PREREAD); - - rxbuf->paddr = seg[0].ds_addr; - em_setup_rxdesc(&rxr->rx_base[j], rxbuf); - } - rxr->next_to_check = 0; - rxr->next_to_refresh = 0; - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - -fail: - EM_RX_UNLOCK(rxr); - return (error); -} - -/********************************************************************* - * - * Initialize all receive rings. - * - **********************************************************************/ -static int -em_setup_receive_structures(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - int q; - - for (q = 0; q < adapter->num_queues; q++, rxr++) - if (em_setup_receive_ring(rxr)) - goto fail; - - return (0); -fail: - /* - * Free RX buffers allocated so far, we will only handle - * the rings that completed, the failing case will have - * cleaned up for itself. 'q' failed, so its the terminus. - */ - for (int i = 0; i < q; ++i) { - rxr = &adapter->rx_rings[i]; - for (int n = 0; n < adapter->num_rx_desc; n++) { - struct em_rxbuffer *rxbuf; - rxbuf = &rxr->rx_buffers[n]; - if (rxbuf->m_head != NULL) { - bus_dmamap_sync(rxr->rxtag, rxbuf->map, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->rxtag, rxbuf->map); - m_freem(rxbuf->m_head); - rxbuf->m_head = NULL; - } - } - rxr->next_to_check = 0; - rxr->next_to_refresh = 0; - } - - return (ENOBUFS); -} - -/********************************************************************* - * - * Free all receive rings. - * - **********************************************************************/ -static void -em_free_receive_structures(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - em_free_receive_buffers(rxr); - /* Free the ring memory as well */ - em_dma_free(adapter, &rxr->rxdma); - EM_RX_LOCK_DESTROY(rxr); - } - - free(adapter->rx_rings, M_DEVBUF); -} - - -/********************************************************************* - * - * Free receive ring data structures - * - **********************************************************************/ -static void -em_free_receive_buffers(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - struct em_rxbuffer *rxbuf = NULL; - - INIT_DEBUGOUT("free_receive_buffers: begin"); - - if (rxr->rx_buffers != NULL) { - for (int i = 0; i < adapter->num_rx_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->map != NULL) { - bus_dmamap_sync(rxr->rxtag, rxbuf->map, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->rxtag, rxbuf->map); - bus_dmamap_destroy(rxr->rxtag, rxbuf->map); - } - if (rxbuf->m_head != NULL) { - m_freem(rxbuf->m_head); - rxbuf->m_head = NULL; - } - } - free(rxr->rx_buffers, M_DEVBUF); - rxr->rx_buffers = NULL; - rxr->next_to_check = 0; - rxr->next_to_refresh = 0; - } - - if (rxr->rxtag != NULL) { - bus_dma_tag_destroy(rxr->rxtag); - rxr->rxtag = NULL; - } - - return; -} - - /********************************************************************* * * Enable receive unit. @@ -4552,12 +2916,15 @@ em_free_receive_buffers(struct rx_ring *rxr) **********************************************************************/ static void -em_initialize_receive_unit(struct adapter *adapter) +em_initialize_receive_unit(if_ctx_t ctx) { - struct rx_ring *rxr = adapter->rx_rings; - if_t ifp = adapter->ifp; + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct ifnet *ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &adapter->hw; - u32 rctl, rxcsum, rfctl; + struct em_rx_queue *que; + int i; + u32 rctl, rxcsum, rfctl; INIT_DEBUGOUT("em_initialize_receive_units: begin"); @@ -4585,124 +2952,96 @@ em_initialize_receive_unit(struct adapter *adapter) else rctl &= ~E1000_RCTL_LPE; - /* Strip the CRC */ - if (!em_disable_crc_stripping) + /* Strip the CRC */ + if (!em_disable_crc_stripping) rctl |= E1000_RCTL_SECRC; - E1000_WRITE_REG(&adapter->hw, E1000_RADV, - adapter->rx_abs_int_delay.value); + if (adapter->hw.mac.type >= e1000_82540) { + E1000_WRITE_REG(&adapter->hw, E1000_RADV, + adapter->rx_abs_int_delay.value); + /* + * Set the interrupt throttling rate. Value is calculated + * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) + */ + E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); + } E1000_WRITE_REG(&adapter->hw, E1000_RDTR, adapter->rx_int_delay.value); - /* - * Set the interrupt throttling rate. Value is calculated - * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) - */ - E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); /* Use extended rx descriptor formats */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_EXTEN; /* - ** When using MSIX interrupts we need to throttle - ** using the EITR register (82574 only) - */ + * When using MSIX interrupts we need to throttle + * using the EITR register (82574 only) + */ if (hw->mac.type == e1000_82574) { for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ - rfctl |= E1000_RFCTL_ACK_DIS; - } - E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); - - rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); - if (if_getcapenable(ifp) & IFCAP_RXCSUM) { -#ifdef EM_MULTIQUEUE - rxcsum |= E1000_RXCSUM_TUOFL | - E1000_RXCSUM_IPOFL | - E1000_RXCSUM_PCSD; -#else - rxcsum |= E1000_RXCSUM_TUOFL; -#endif - } else - rxcsum &= ~E1000_RXCSUM_TUOFL; - - E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); - -#ifdef EM_MULTIQUEUE -#define RSSKEYLEN 10 - if (adapter->num_queues > 1) { - uint8_t rss_key[4 * RSSKEYLEN]; - uint32_t reta = 0; - int i; - - /* - * Configure RSS key - */ - arc4rand(rss_key, sizeof(rss_key), 0); - for (i = 0; i < RSSKEYLEN; ++i) { - uint32_t rssrk = 0; - - rssrk = EM_RSSRK_VAL(rss_key, i); - E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); - } - - /* - * Configure RSS redirect table in following fashion: - * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] - */ - for (i = 0; i < sizeof(reta); ++i) { - uint32_t q; + rfctl |= E1000_RFCTL_ACK_DIS; + } + E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); - q = (i % adapter->num_queues) << 7; - reta |= q << (8 * i); + rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); + if (if_getcapenable(ifp) & IFCAP_RXCSUM && + adapter->hw.mac.type >= e1000_82543) { + if (adapter->tx_num_queues > 1) { + if (adapter->hw.mac.type >= igb_mac_min) { + rxcsum |= E1000_RXCSUM_PCSD; + if (hw->mac.type != e1000_82575) + rxcsum |= E1000_RXCSUM_CRCOFL; + } else + rxcsum |= E1000_RXCSUM_TUOFL | + E1000_RXCSUM_IPOFL | + E1000_RXCSUM_PCSD; + } else { + if (adapter->hw.mac.type >= igb_mac_min) + rxcsum |= E1000_RXCSUM_IPPCSE; + else + rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL; + if (adapter->hw.mac.type > e1000_82575) + rxcsum |= E1000_RXCSUM_CRCOFL; } + } else + rxcsum &= ~E1000_RXCSUM_TUOFL; - for (i = 0; i < 32; ++i) { - E1000_WRITE_REG(hw, E1000_RETA(i), reta); - } + E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); - E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | - E1000_MRQC_RSS_FIELD_IPV4_TCP | - E1000_MRQC_RSS_FIELD_IPV4 | - E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | - E1000_MRQC_RSS_FIELD_IPV6_EX | - E1000_MRQC_RSS_FIELD_IPV6); + if (adapter->rx_num_queues > 1) { + if (adapter->hw.mac.type >= igb_mac_min) + igb_initialize_rss_mapping(adapter); + else + em_initialize_rss_mapping(adapter); } -#endif + /* - ** XXX TEMPORARY WORKAROUND: on some systems with 82573 - ** long latencies are observed, like Lenovo X60. This - ** change eliminates the problem, but since having positive - ** values in RDTR is a known source of problems on other - ** platforms another solution is being sought. - */ + * XXX TEMPORARY WORKAROUND: on some systems with 82573 + * long latencies are observed, like Lenovo X60. This + * change eliminates the problem, but since having positive + * values in RDTR is a known source of problems on other + * platforms another solution is being sought. + */ if (hw->mac.type == e1000_82573) E1000_WRITE_REG(hw, E1000_RDTR, 0x20); - for (int i = 0; i < adapter->num_queues; i++, rxr++) { + for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { + struct rx_ring *rxr = &que->rxr; /* Setup the Base and Length of the Rx Descriptor Ring */ - u64 bus_addr = rxr->rxdma.dma_paddr; - u32 rdt = adapter->num_rx_desc - 1; /* default */ + u64 bus_addr = rxr->rx_paddr; +#if 0 + u32 rdt = adapter->rx_num_queues -1; /* default */ +#endif E1000_WRITE_REG(hw, E1000_RDLEN(i), - adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended)); + scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ E1000_WRITE_REG(hw, E1000_RDH(i), 0); -#ifdef DEV_NETMAP - /* - * an init() while a netmap client is active must - * preserve the rx buffers passed to userspace. - */ - if (if_getcapenable(ifp) & IFCAP_NETMAP) { - struct netmap_adapter *na = netmap_getna(adapter->ifp); - rdt -= nm_kr_rxspace(&na->rx_rings[i]); - } -#endif /* DEV_NETMAP */ - E1000_WRITE_REG(hw, E1000_RDT(i), rdt); + E1000_WRITE_REG(hw, E1000_RDT(i), 0); } /* @@ -4712,6 +3051,7 @@ em_initialize_receive_unit(struct adapter *adapter) * Only write to RXDCTL(1) if there is a need for different * settings. */ + if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && @@ -4719,384 +3059,151 @@ em_initialize_receive_unit(struct adapter *adapter) u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); } else if (adapter->hw.mac.type == e1000_82574) { - for (int i = 0; i < adapter->num_queues; i++) { + for (int i = 0; i < adapter->rx_num_queues; i++) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); - rxdctl |= 0x20; /* PTHRESH */ rxdctl |= 4 << 8; /* HTHRESH */ rxdctl |= 4 << 16;/* WTHRESH */ rxdctl |= 1 << 24; /* Switch to granularity */ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } - } - - if (adapter->hw.mac.type >= e1000_pch2lan) { - if (if_getmtu(ifp) > ETHERMTU) - e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); - else - e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); - } - - /* Make sure VLAN Filters are off */ - rctl &= ~E1000_RCTL_VFE; - - if (adapter->rx_mbuf_sz == MCLBYTES) - rctl |= E1000_RCTL_SZ_2048; - else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) - rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; - else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) - rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; - - /* ensure we clear use DTYPE of 00 here */ - rctl &= ~0x00000C00; - /* Write out the settings */ - E1000_WRITE_REG(hw, E1000_RCTL, rctl); - - return; -} - - -/********************************************************************* - * - * This routine executes in interrupt context. It replenishes - * the mbufs in the descriptor and sends data which has been - * dma'ed into host memory to upper layer. - * - * We loop at most count times if count is > 0, or until done if - * count < 0. - * - * For polling we also now return the number of cleaned packets - *********************************************************************/ -static bool -em_rxeof(struct rx_ring *rxr, int count, int *done) -{ - struct adapter *adapter = rxr->adapter; - if_t ifp = adapter->ifp; - struct mbuf *mp, *sendmp; - u32 status = 0; - u16 len; - int i, processed, rxdone = 0; - bool eop; - union e1000_rx_desc_extended *cur; - - EM_RX_LOCK(rxr); - - /* Sync the ring */ - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - - -#ifdef DEV_NETMAP - if (netmap_rx_irq(ifp, rxr->me, &processed)) { - EM_RX_UNLOCK(rxr); - return (FALSE); - } -#endif /* DEV_NETMAP */ - - for (i = rxr->next_to_check, processed = 0; count != 0;) { - if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) - break; - - cur = &rxr->rx_base[i]; - status = le32toh(cur->wb.upper.status_error); - mp = sendmp = NULL; - - if ((status & E1000_RXD_STAT_DD) == 0) - break; - - len = le16toh(cur->wb.upper.length); - eop = (status & E1000_RXD_STAT_EOP) != 0; - - if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) || - (rxr->discard == TRUE)) { - adapter->dropped_pkts++; - ++rxr->rx_discarded; - if (!eop) /* Catch subsequent segs */ - rxr->discard = TRUE; - else - rxr->discard = FALSE; - em_rx_discard(rxr, i); - goto next_desc; - } - bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map); - - /* Assign correct length to the current fragment */ - mp = rxr->rx_buffers[i].m_head; - mp->m_len = len; - - /* Trigger for refresh */ - rxr->rx_buffers[i].m_head = NULL; - - /* First segment? */ - if (rxr->fmp == NULL) { - mp->m_pkthdr.len = len; - rxr->fmp = rxr->lmp = mp; - } else { - /* Chain mbuf's together */ - mp->m_flags &= ~M_PKTHDR; - rxr->lmp->m_next = mp; - rxr->lmp = mp; - rxr->fmp->m_pkthdr.len += len; - } + } else if (adapter->hw.mac.type >= igb_mac_min) { + u32 psize, srrctl = 0; - if (eop) { - --count; - sendmp = rxr->fmp; - if_setrcvif(sendmp, ifp); - if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); - em_receive_checksum(status, sendmp); -#ifndef __NO_STRICT_ALIGNMENT - if (adapter->hw.mac.max_frame_size > - (MCLBYTES - ETHER_ALIGN) && - em_fixup_rx(rxr) != 0) - goto skip; -#endif - if (status & E1000_RXD_STAT_VP) { - if_setvtag(sendmp, - le16toh(cur->wb.upper.vlan)); - sendmp->m_flags |= M_VLANTAG; + if (if_getmtu(ifp) > ETHERMTU) { + /* Set maximum packet len */ + if (adapter->rx_mbuf_sz <= 4096) { + srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; + } else if (adapter->rx_mbuf_sz > 4096) { + srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; } -#ifndef __NO_STRICT_ALIGNMENT -skip: -#endif - rxr->fmp = rxr->lmp = NULL; + psize = scctx->isc_max_frame_size; + /* are we on a vlan? */ + if (ifp->if_vlantrunk != NULL) + psize += VLAN_TAG_SIZE; + E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); + } else { + srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + rctl |= E1000_RCTL_SZ_2048; } -next_desc: - /* Sync the ring */ - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - - /* Zero out the receive descriptors status. */ - cur->wb.upper.status_error &= htole32(~0xFF); - ++rxdone; /* cumulative for POLL */ - ++processed; - - /* Advance our pointers to the next descriptor. */ - if (++i == adapter->num_rx_desc) - i = 0; - - /* Send to the stack */ - if (sendmp != NULL) { - rxr->next_to_check = i; - EM_RX_UNLOCK(rxr); - if_input(ifp, sendmp); - EM_RX_LOCK(rxr); - i = rxr->next_to_check; + + /* + * If TX flow control is disabled and there's >1 queue defined, + * enable DROP. + * + * This drops frames rather than hanging the RX MAC for all queues. + */ + if ((adapter->rx_num_queues > 1) && + (adapter->fc == e1000_fc_none || + adapter->fc == e1000_fc_rx_pause)) { + srrctl |= E1000_SRRCTL_DROP_EN; } + /* Setup the Base and Length of the Rx Descriptor Rings */ + for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { + struct rx_ring *rxr = &que->rxr; + u64 bus_addr = rxr->rx_paddr; + u32 rxdctl; + +#ifdef notyet + /* Configure for header split? -- ignore for now */ + rxr->hdr_split = igb_header_split; +#else + srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; +#endif - /* Only refresh mbufs every 8 descriptors */ - if (processed == 8) { - em_refresh_mbufs(rxr, i); - processed = 0; - } + E1000_WRITE_REG(hw, E1000_RDLEN(i), + scctx->isc_nrxd[0] * sizeof(struct e1000_rx_desc)); + E1000_WRITE_REG(hw, E1000_RDBAH(i), + (uint32_t)(bus_addr >> 32)); + E1000_WRITE_REG(hw, E1000_RDBAL(i), + (uint32_t)bus_addr); + E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); + /* Enable this Queue */ + rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); + rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; + rxdctl &= 0xFFF00000; + rxdctl |= IGB_RX_PTHRESH; + rxdctl |= IGB_RX_HTHRESH << 8; + rxdctl |= IGB_RX_WTHRESH << 16; + E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); + } + } else if (adapter->hw.mac.type >= e1000_pch2lan) { + if (if_getmtu(ifp) > ETHERMTU) + e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); + else + e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); } - /* Catch any remaining refresh work */ - if (e1000_rx_unrefreshed(rxr)) - em_refresh_mbufs(rxr, i); - - rxr->next_to_check = i; - if (done != NULL) - *done = rxdone; - EM_RX_UNLOCK(rxr); - - return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); -} - -static __inline void -em_rx_discard(struct rx_ring *rxr, int i) -{ - struct em_rxbuffer *rbuf; - - rbuf = &rxr->rx_buffers[i]; - bus_dmamap_unload(rxr->rxtag, rbuf->map); - - /* Free any previous pieces */ - if (rxr->fmp != NULL) { - rxr->fmp->m_flags |= M_PKTHDR; - m_freem(rxr->fmp); - rxr->fmp = NULL; - rxr->lmp = NULL; - } - /* - ** Free buffer and allow em_refresh_mbufs() - ** to clean up and recharge buffer. - */ - if (rbuf->m_head) { - m_free(rbuf->m_head); - rbuf->m_head = NULL; - } - return; -} + /* Make sure VLAN Filters are off */ + rctl &= ~E1000_RCTL_VFE; -#ifndef __NO_STRICT_ALIGNMENT -/* - * When jumbo frames are enabled we should realign entire payload on - * architecures with strict alignment. This is serious design mistake of 8254x - * as it nullifies DMA operations. 8254x just allows RX buffer size to be - * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its - * payload. On architecures without strict alignment restrictions 8254x still - * performs unaligned memory access which would reduce the performance too. - * To avoid copying over an entire frame to align, we allocate a new mbuf and - * copy ethernet header to the new mbuf. The new mbuf is prepended into the - * existing mbuf chain. - * - * Be aware, best performance of the 8254x is achived only when jumbo frame is - * not used at all on architectures with strict alignment. - */ -static int -em_fixup_rx(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - struct mbuf *m, *n; - int error; + if (adapter->hw.mac.type < igb_mac_min) { + if (adapter->rx_mbuf_sz == MCLBYTES) + rctl |= E1000_RCTL_SZ_2048; + else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) + rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; + else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) + rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; - error = 0; - m = rxr->fmp; - if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { - bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); - m->m_data += ETHER_HDR_LEN; - } else { - MGETHDR(n, M_NOWAIT, MT_DATA); - if (n != NULL) { - bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); - m->m_data += ETHER_HDR_LEN; - m->m_len -= ETHER_HDR_LEN; - n->m_len = ETHER_HDR_LEN; - M_MOVE_PKTHDR(n, m); - n->m_next = m; - rxr->fmp = n; - } else { - adapter->dropped_pkts++; - m_freem(rxr->fmp); - rxr->fmp = NULL; - error = ENOMEM; - } + /* ensure we clear use DTYPE of 00 here */ + rctl &= ~0x00000C00; } - return (error); -} -#endif - -static void -em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf) -{ - rxd->read.buffer_addr = htole64(rxbuf->paddr); - /* DD bits must be cleared */ - rxd->wb.upper.status_error= 0; -} - -/********************************************************************* - * - * Verify that the hardware indicated that the checksum is valid. - * Inform the stack about the status of checksum so that stack - * doesn't spend time verifying the checksum. - * - *********************************************************************/ -static void -em_receive_checksum(uint32_t status, struct mbuf *mp) -{ - mp->m_pkthdr.csum_flags = 0; - - /* Ignore Checksum bit is set */ - if (status & E1000_RXD_STAT_IXSM) - return; - - /* If the IP checksum exists and there is no IP Checksum error */ - if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == - E1000_RXD_STAT_IPCS) { - mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); - } + /* Write out the settings */ + E1000_WRITE_REG(hw, E1000_RCTL, rctl); - /* TCP or UDP checksum */ - if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == - E1000_RXD_STAT_TCPCS) { - mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); - mp->m_pkthdr.csum_data = htons(0xffff); - } - if (status & E1000_RXD_STAT_UDPCS) { - mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); - mp->m_pkthdr.csum_data = htons(0xffff); - } + return; } -/* - * This routine is run via an vlan - * config EVENT - */ static void -em_register_vlan(void *arg, if_t ifp, u16 vtag) +em_if_vlan_register(if_ctx_t ctx, u16 vtag) { - struct adapter *adapter = if_getsoftc(ifp); - u32 index, bit; - - if ((void*)adapter != arg) /* Not our event */ - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ - return; + struct adapter *adapter = iflib_get_softc(ctx); + u32 index, bit; - EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; - /* Re-init to load the changes */ - if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) - em_init_locked(adapter); - EM_CORE_UNLOCK(adapter); } -/* - * This routine is run via an vlan - * unconfig EVENT - */ static void -em_unregister_vlan(void *arg, if_t ifp, u16 vtag) +em_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { - struct adapter *adapter = if_getsoftc(ifp); - u32 index, bit; - - if (adapter != arg) - return; + struct adapter *adapter = iflib_get_softc(ctx); + u32 index, bit; - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; - /* Re-init to load the changes */ - if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) - em_init_locked(adapter); - EM_CORE_UNLOCK(adapter); } static void em_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - u32 reg; + u32 reg; /* - ** We get here thru init_locked, meaning - ** a soft reset, this has already cleared - ** the VFTA and other state, so if there - ** have been no vlan's registered do nothing. - */ + * We get here thru init_locked, meaning + * a soft reset, this has already cleared + * the VFTA and other state, so if there + * have been no vlan's registered do nothing. + */ if (adapter->num_vlans == 0) - return; + return; /* - ** A soft reset zero's out the VFTA, so - ** we need to repopulate it now. - */ + * A soft reset zero's out the VFTA, so + * we need to repopulate it now. + */ for (int i = 0; i < EM_VFTA_SIZE; i++) - if (adapter->shadow_vfta[i] != 0) + if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, - i, adapter->shadow_vfta[i]); + i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; @@ -5110,25 +3217,38 @@ em_setup_vlan_hw_support(struct adapter *adapter) } static void -em_enable_intr(struct adapter *adapter) +em_if_enable_intr(if_ctx_t ctx) { + struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { - E1000_WRITE_REG(hw, EM_EIAC, adapter->ims); + E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK); ims_mask |= adapter->ims; - } + } else if (adapter->intr_type == IFLIB_INTR_MSIX && hw->mac.type >= igb_mac_min) { + u32 mask = (adapter->que_mask | adapter->link_mask); + + E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); + E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); + ims_mask = E1000_IMS_LSC; + } + E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void -em_disable_intr(struct adapter *adapter) +em_if_disable_intr(if_ctx_t ctx) { + struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; - if (hw->mac.type == e1000_82574) - E1000_WRITE_REG(hw, EM_EIAC, 0); + if (adapter->intr_type == IFLIB_INTR_MSIX) { + if (hw->mac.type >= igb_mac_min) + E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); + E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); + } E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); } @@ -5149,7 +3269,7 @@ em_init_manageability(struct adapter *adapter) /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); - /* enable receiving management packets to the host */ + /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) @@ -5247,19 +3367,38 @@ em_is_valid_ether_addr(u8 *addr) ** later use. */ static void -em_get_wakeup(device_t dev) +em_get_wakeup(if_ctx_t ctx) { - struct adapter *adapter = device_get_softc(dev); - u16 eeprom_data = 0, device_id, apme_mask; + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { + case e1000_82542: + case e1000_82543: + break; + case e1000_82544: + e1000_read_nvm(&adapter->hw, + NVM_INIT_CONTROL2_REG, 1, &eeprom_data); + apme_mask = EM_82544_APME; + break; + case e1000_82546: + case e1000_82546_rev_3: + if (adapter->hw.bus.func == 1) { + e1000_read_nvm(&adapter->hw, + NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); + break; + } else + e1000_read_nvm(&adapter->hw, + NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); + break; case e1000_82573: case e1000_82583: adapter->has_amt = TRUE; - /* Falls thru */ + /* FALLTHROUGH */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: @@ -5278,6 +3417,15 @@ em_get_wakeup(device_t dev) case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: + case e1000_82575: /* listing all igb devices */ + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + case e1000_vfadapt: + case e1000_vfadapt_i350: apme_mask = E1000_WUC_APME; adapter->has_amt = TRUE; eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); @@ -5290,12 +3438,31 @@ em_get_wakeup(device_t dev) if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* - * We have the eeprom settings, now apply the special cases - * where the eeprom may be wrong or the board won't support - * wake on lan on a particular port + * We have the eeprom settings, now apply the special cases + * where the eeprom may be wrong or the board won't support + * wake on lan on a particular port */ device_id = pci_get_device(dev); - switch (device_id) { + switch (device_id) { + case E1000_DEV_ID_82546GB_PCIE: + adapter->wol = 0; + break; + case E1000_DEV_ID_82546EB_FIBER: + case E1000_DEV_ID_82546GB_FIBER: + /* Wake events only supported on port A for dual fiber + * regardless of eeprom setting */ + if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & + E1000_STATUS_FUNC_1) + adapter->wol = 0; + break; + case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: + /* if quad port adapter, disable WoL on all but port A */ + if (global_quad_port_a != 0) + adapter->wol = 0; + /* Reset for multiple quad port adapters */ + if (++global_quad_port_a == 4) + global_quad_port_a = 0; + break; case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ @@ -5306,13 +3473,13 @@ em_get_wakeup(device_t dev) case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: - /* if quad port adapter, disable WoL on all but port A */ + /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; - break; + break; } return; } @@ -5322,12 +3489,13 @@ em_get_wakeup(device_t dev) * Enable PCI Wake On Lan capability */ static void -em_enable_wakeup(device_t dev) +em_enable_wakeup(if_ctx_t ctx) { - struct adapter *adapter = device_get_softc(dev); - if_t ifp = adapter->ifp; - u32 pmc, ctrl, ctrl_ext, rctl, wuc; - u16 status; + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + if_t ifp = iflib_get_ifp(ctx); + u32 pmc, ctrl, ctrl_ext, rctl, wuc; + u16 status; if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) return; @@ -5337,8 +3505,8 @@ em_enable_wakeup(device_t dev) ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); wuc = E1000_READ_REG(&adapter->hw, E1000_WUC); - wuc |= E1000_WUC_PME_EN; - E1000_WRITE_REG(&adapter->hw, E1000_WUC, wuc); + wuc |= (E1000_WUC_PME_EN | E1000_WUC_APME); + E1000_WRITE_REG(&adapter->hw, E1000_WUC, wuc); if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_pchlan) || @@ -5355,12 +3523,15 @@ em_enable_wakeup(device_t dev) } /* - ** Determine type of Wakeup: note that wol - ** is set with all bits on by default. - */ + * Determine type of Wakeup: note that wol + * is set with all bits on by default. + */ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; + if ((if_getcapenable(ifp) & IFCAP_WOL_UCAST) == 0) + adapter->wol &= ~E1000_WUFC_EX; + if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { @@ -5369,10 +3540,7 @@ em_enable_wakeup(device_t dev) E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } - if ((adapter->hw.mac.type == e1000_pchlan) || - (adapter->hw.mac.type == e1000_pch2lan) || - (adapter->hw.mac.type == e1000_pch_lpt) || - (adapter->hw.mac.type == e1000_pch_spt)) { + if ( adapter->hw.mac.type >= e1000_pchlan) { if (em_enable_phy_wakeup(adapter)) return; } else { @@ -5383,20 +3551,20 @@ em_enable_wakeup(device_t dev) if (adapter->hw.phy.type == e1000_phy_igp_3) e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); - /* Request PME */ - status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); + /* Request PME */ + status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (if_getcapenable(ifp) & IFCAP_WOL) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; - pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); + pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* -** WOL in the newer chipset interfaces (pchlan) -** require thing to be copied into the phy -*/ + * WOL in the newer chipset interfaces (pchlan) + * require thing to be copied into the phy + */ static int em_enable_phy_wakeup(struct adapter *adapter) { @@ -5437,7 +3605,7 @@ em_enable_phy_wakeup(struct adapter *adapter) /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, - E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); + E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN | E1000_WUC_APME); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ @@ -5468,11 +3636,10 @@ out: } static void -em_led_func(void *arg, int onoff) +em_if_led_func(if_ctx_t ctx, int onoff) { - struct adapter *adapter = arg; + struct adapter *adapter = iflib_get_softc(ctx); - EM_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); @@ -5480,26 +3647,25 @@ em_led_func(void *arg, int onoff) e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } - EM_CORE_UNLOCK(adapter); } /* -** Disable the L0S and L1 LINK states -*/ + * Disable the L0S and L1 LINK states + */ static void em_disable_aspm(struct adapter *adapter) { - int base, reg; - u16 link_cap,link_ctrl; - device_t dev = adapter->dev; + int base, reg; + u16 link_cap,link_ctrl; + device_t dev = adapter->dev; switch (adapter->hw.mac.type) { - case e1000_82573: - case e1000_82574: - case e1000_82583: - break; - default: - return; + case e1000_82573: + case e1000_82574: + case e1000_82583: + break; + default: + return; } if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) return; @@ -5595,27 +3761,26 @@ em_update_stats_counters(struct adapter *adapter) adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); if (adapter->hw.mac.type >= e1000_82543) { - adapter->stats.algnerrc += + adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); - adapter->stats.rxerrc += + adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); - adapter->stats.tncrs += + adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); - adapter->stats.cexterr += + adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); - adapter->stats.tsctc += + adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); - adapter->stats.tsctfc += + adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } } static uint64_t -em_get_counter(if_t ifp, ift_counter cnt) +em_if_get_counter(if_ctx_t ctx, ift_counter cnt) { - struct adapter *adapter; - - adapter = if_getsoftc(ifp); + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_COLLISIONS: @@ -5651,10 +3816,9 @@ em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) static void em_add_hw_stats(struct adapter *adapter) { - device_t dev = adapter->dev; - - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + device_t dev = iflib_get_dev(adapter->ctx); + struct em_tx_queue *tx_que = adapter->tx_queues; + struct em_rx_queue *rx_que = adapter->rx_queues; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); @@ -5666,18 +3830,18 @@ em_add_hw_stats(struct adapter *adapter) #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; - + /* Driver Statistics */ - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "Defragmenting mbuf chain failed"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", @@ -5686,7 +3850,6 @@ em_add_hw_stats(struct adapter *adapter) SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, em_sysctl_reg_handler, "IU", @@ -5698,44 +3861,48 @@ em_add_hw_stats(struct adapter *adapter) SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); - SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); - for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { + for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), em_sysctl_reg_handler, "IU", - "Transmit Descriptor Head"); - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", + "Transmit Descriptor Head"); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", - "Transmit Descriptor Tail"); + "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", CTLFLAG_RD, &txr->tx_irq, "Queue MSI-X Transmit Interrupts"); - SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); + } - snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i); + for (int j = 0; j < adapter->rx_num_queues; j++, rx_que++) { + struct rx_ring *rxr = &rx_que->rxr; + snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", j); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Head"); - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", @@ -5747,7 +3914,7 @@ em_add_hw_stats(struct adapter *adapter) /* MAC stats get their own sub node */ - stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", + stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); @@ -5850,14 +4017,14 @@ em_add_hw_stats(struct adapter *adapter) SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", - CTLFLAG_RD, &adapter->stats.gorc, - "Good Octets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", + CTLFLAG_RD, &adapter->stats.gorc, + "Good Octets Received"); /* Packet Transmission Stats */ - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", - CTLFLAG_RD, &adapter->stats.gotc, - "Good Octets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", + CTLFLAG_RD, &adapter->stats.gotc, + "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); @@ -5973,8 +4140,8 @@ em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) static void em_print_nvm_info(struct adapter *adapter) { - u16 eeprom_data; - int i, j, row = 0; + u16 eeprom_data; + int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); @@ -5998,7 +4165,7 @@ em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) u32 regval; int error, usecs, ticks; - info = (struct em_int_delay_info *)arg1; + info = (struct em_int_delay_info *) arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) @@ -6011,8 +4178,7 @@ em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) ticks *= 4; adapter = info->adapter; - - EM_CORE_LOCK(adapter); + regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ @@ -6029,7 +4195,6 @@ em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); - EM_CORE_UNLOCK(adapter); return (0); } @@ -6047,78 +4212,66 @@ em_add_int_delay_sysctl(struct adapter *adapter, const char *name, info, 0, em_sysctl_int_delay, "I", description); } -static void -em_set_sysctl_value(struct adapter *adapter, const char *name, - const char *description, int *limit, int value) -{ - *limit = value; - SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLFLAG_RW, limit, value, description); -} - - /* -** Set flow control using sysctl: -** Flow control values: -** 0 - off -** 1 - rx pause -** 2 - tx pause -** 3 - full -*/ + * Set flow control using sysctl: + * Flow control values: + * 0 - off + * 1 - rx pause + * 2 - tx pause + * 3 - full + */ static int em_set_flowcntl(SYSCTL_HANDLER_ARGS) -{ - int error; - static int input = 3; /* default is full */ - struct adapter *adapter = (struct adapter *) arg1; - - error = sysctl_handle_int(oidp, &input, 0, req); - - if ((error) || (req->newptr == NULL)) - return (error); - +{ + int error; + static int input = 3; /* default is full */ + struct adapter *adapter = (struct adapter *) arg1; + + error = sysctl_handle_int(oidp, &input, 0, req); + + if ((error) || (req->newptr == NULL)) + return (error); + if (input == adapter->fc) /* no change? */ return (error); - switch (input) { - case e1000_fc_rx_pause: - case e1000_fc_tx_pause: - case e1000_fc_full: - case e1000_fc_none: - adapter->hw.fc.requested_mode = input; - adapter->fc = input; - break; - default: - /* Do nothing */ - return (error); - } + switch (input) { + case e1000_fc_rx_pause: + case e1000_fc_tx_pause: + case e1000_fc_full: + case e1000_fc_none: + adapter->hw.fc.requested_mode = input; + adapter->fc = input; + break; + default: + /* Do nothing */ + return (error); + } - adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; - e1000_force_mac_fc(&adapter->hw); - return (error); + adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; + e1000_force_mac_fc(&adapter->hw); + return (error); } /* -** Manage Energy Efficient Ethernet: -** Control values: -** 0/1 - enabled/disabled -*/ + * Manage Energy Efficient Ethernet: + * Control values: + * 0/1 - enabled/disabled + */ static int em_sysctl_eee(SYSCTL_HANDLER_ARGS) { - struct adapter *adapter = (struct adapter *) arg1; - int error, value; - - value = adapter->hw.dev_spec.ich8lan.eee_disable; - error = sysctl_handle_int(oidp, &value, 0, req); - if (error || req->newptr == NULL) - return (error); - EM_CORE_LOCK(adapter); - adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); - em_init_locked(adapter); - EM_CORE_UNLOCK(adapter); - return (0); + struct adapter *adapter = (struct adapter *) arg1; + int error, value; + + value = adapter->hw.dev_spec.ich8lan.eee_disable; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error || req->newptr == NULL) + return (error); + adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); + em_if_init(adapter->ctx); + + return (0); } static int @@ -6135,66 +4288,84 @@ em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) return (error); if (result == 1) { - adapter = (struct adapter *)arg1; + adapter = (struct adapter *) arg1; em_print_debug_info(adapter); } return (error); } +static int +em_get_rs(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + int error; + int result; + + result = 0; + error = sysctl_handle_int(oidp, &result, 0, req); + + if (error || !req->newptr || result != 1) + return (error); + em_dump_rs(adapter); + + return (error); +} + +static void +em_if_debug(if_ctx_t ctx) +{ + em_dump_rs(iflib_get_softc(ctx)); +} + /* -** This routine is meant to be fluid, add whatever is -** needed for debugging a problem. -jfv -*/ + * This routine is meant to be fluid, add whatever is + * needed for debugging a problem. -jfv + */ static void em_print_debug_info(struct adapter *adapter) { - device_t dev = adapter->dev; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + device_t dev = iflib_get_dev(adapter->ctx); + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); + struct tx_ring *txr = &adapter->tx_queues->txr; + struct rx_ring *rxr = &adapter->rx_queues->rxr; - if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) + if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) printf("Interface is RUNNING "); else printf("Interface is NOT RUNNING\n"); - if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE) + if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE) printf("and INACTIVE\n"); else printf("and ACTIVE\n"); - for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { + for (int i = 0; i < adapter->tx_num_queues; i++, txr++) { device_printf(dev, "TX Queue %d ------\n", i); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", - E1000_READ_REG(&adapter->hw, E1000_TDH(i)), - E1000_READ_REG(&adapter->hw, E1000_TDT(i))); - device_printf(dev, "Tx Queue Status = %d\n", txr->busy); - device_printf(dev, "TX descriptors avail = %d\n", - txr->tx_avail); - device_printf(dev, "Tx Descriptors avail failure = %ld\n", - txr->no_desc_avail); - device_printf(dev, "RX Queue %d ------\n", i); + E1000_READ_REG(&adapter->hw, E1000_TDH(i)), + E1000_READ_REG(&adapter->hw, E1000_TDT(i))); + + } + for (int j=0; j < adapter->rx_num_queues; j++, rxr++) { + device_printf(dev, "RX Queue %d ------\n", j); device_printf(dev, "hw rdh = %d, hw rdt = %d\n", - E1000_READ_REG(&adapter->hw, E1000_RDH(i)), - E1000_READ_REG(&adapter->hw, E1000_RDT(i))); - device_printf(dev, "RX discarded packets = %ld\n", - rxr->rx_discarded); - device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check); - device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh); + E1000_READ_REG(&adapter->hw, E1000_RDH(j)), + E1000_READ_REG(&adapter->hw, E1000_RDT(j))); } } -#ifdef EM_MULTIQUEUE /* * 82574 only: * Write a new value to the EEPROM increasing the number of MSIX * vectors from 3 to 5, for proper multiqueue support. */ static void -em_enable_vectors_82574(struct adapter *adapter) +em_enable_vectors_82574(if_ctx_t ctx) { + struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; - device_t dev = adapter->dev; + device_t dev = iflib_get_dev(ctx); u16 edata; e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); @@ -6209,42 +4380,3 @@ em_enable_vectors_82574(struct adapter *adapter) device_printf(dev, "Writing to eeprom: done\n"); } } -#endif - -#ifdef DDB -DB_COMMAND(em_reset_dev, em_ddb_reset_dev) -{ - devclass_t dc; - int max_em; - - dc = devclass_find("em"); - max_em = devclass_get_maxunit(dc); - - for (int index = 0; index < (max_em - 1); index++) { - device_t dev; - dev = devclass_get_device(dc, index); - if (device_get_driver(dev) == &em_driver) { - struct adapter *adapter = device_get_softc(dev); - EM_CORE_LOCK(adapter); - em_init_locked(adapter); - EM_CORE_UNLOCK(adapter); - } - } -} -DB_COMMAND(em_dump_queue, em_ddb_dump_queue) -{ - devclass_t dc; - int max_em; - - dc = devclass_find("em"); - max_em = devclass_get_maxunit(dc); - - for (int index = 0; index < (max_em - 1); index++) { - device_t dev; - dev = devclass_get_device(dc, index); - if (device_get_driver(dev) == &em_driver) - em_print_debug_info(device_get_softc(dev)); - } - -} -#endif diff --git a/freebsd/sys/dev/e1000/if_em.h b/freebsd/sys/dev/e1000/if_em.h index 2a2bf2cc..eb353700 100644 --- a/freebsd/sys/dev/e1000/if_em.h +++ b/freebsd/sys/dev/e1000/if_em.h @@ -1,36 +1,92 @@ -/****************************************************************************** - - Copyright (c) 2001-2015, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - -******************************************************************************/ +/*- + * Copyright (c) 2016 Matt Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + /*$FreeBSD$*/ +#include +#include +#include + +#ifdef HAVE_KERNEL_OPTION_HEADERS +#include +#endif + +#include +#include +#ifdef DDB +#include +#include +#endif +#if __FreeBSD_version >= 800000 +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "e1000_api.h" +#include "e1000_82571.h" +#include #ifndef _EM_H_DEFINED_ @@ -51,13 +107,10 @@ * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ -#define EM_MIN_TXD 80 +#define EM_MIN_TXD 128 #define EM_MAX_TXD 4096 -#ifdef EM_MULTIQUEUE -#define EM_DEFAULT_TXD 4096 -#else -#define EM_DEFAULT_TXD 1024 -#endif +#define EM_DEFAULT_TXD 1024 +#define EM_DEFAULT_MULTI_TXD 4096 /* * EM_RXD - Maximum number of receive Descriptors @@ -72,13 +125,10 @@ * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ -#define EM_MIN_RXD 80 +#define EM_MIN_RXD 128 #define EM_MAX_RXD 4096 -#ifdef EM_MULTIQUEUE -#define EM_DEFAULT_RXD 4096 -#else -#define EM_DEFAULT_RXD 1024 -#endif +#define EM_DEFAULT_RXD 1024 +#define EM_DEFAULT_MULTI_RXD 4096 /* * EM_TIDV - Transmit Interrupt Delay Value @@ -125,11 +175,7 @@ * restoring the network connection. To eliminate the potential * for the hang ensure that EM_RDTR is set to 0. */ -#ifdef EM_MULTIQUEUE -#define EM_RDTR 64 -#else #define EM_RDTR 0 -#endif /* * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) @@ -142,22 +188,7 @@ * along with EM_RDTR, may improve traffic throughput in specific network * conditions. */ -#ifdef EM_MULTIQUEUE -#define EM_RADV 128 -#else #define EM_RADV 64 -#endif - -/* - * This parameter controls the max duration of transmit watchdog. - */ -#define EM_WATCHDOG (10 * hz) - -/* - * This parameter controls when the driver calls the routine to reclaim - * transmit descriptors. - */ -#define EM_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 8) /* * This parameter controls whether or not autonegotation is enabled. @@ -221,6 +252,18 @@ #define PCICFG_DESC_RING_STATUS 0xe4 #define FLUSH_DESC_REQUIRED 0x100 + +#define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : \ + ((hw->mac.type <= e1000_82576) ? 16 : 8)) +#define IGB_RX_HTHRESH 8 +#define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ + (adapter->intr_type == IFLIB_INTR_MSIX)) ? 1 : 4) + +#define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) +#define IGB_TX_HTHRESH 1 +#define IGB_TX_WTHRESH ((hw->mac.type != e1000_82575 && \ + (adapter->intr_type == IFLIB_INTR_MSIX) ? 1 : 16) + /* * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will @@ -242,6 +285,7 @@ #define EM_BAR_TYPE(v) ((v) & EM_BAR_TYPE_MASK) #define EM_BAR_TYPE_MASK 0x00000001 #define EM_BAR_TYPE_MMEM 0x00000000 +#define EM_BAR_TYPE_IO 0x00000001 #define EM_BAR_TYPE_FLASH 0x0014 #define EM_BAR_MEM_TYPE(v) ((v) & EM_BAR_MEM_TYPE_MASK) #define EM_BAR_MEM_TYPE_MASK 0x00000006 @@ -277,7 +321,11 @@ #define EM_MSIX_LINK 0x01000000 /* For 82574 use */ #define ETH_ZLEN 60 #define ETH_ADDR_LEN 6 -#define CSUM_OFFLOAD 7 /* Offload bits in mbuf flag */ +#define EM_CSUM_OFFLOAD 7 /* Offload bits in mbuf flag */ +#define IGB_CSUM_OFFLOAD 0x0E0F /* Offload bits in mbuf flag */ + +#define IGB_PKTTYPE_MASK 0x0000FFF0 +#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ /* * 82574 has a nonstandard address for EIAC @@ -295,19 +343,6 @@ #define EM_NVM_MSIX_N_MASK (0x7 << EM_NVM_MSIX_N_SHIFT) #define EM_NVM_MSIX_N_SHIFT 7 -/* - * Bus dma allocation structure used by - * e1000_dma_malloc and e1000_dma_free. - */ -struct em_dma_alloc { - bus_addr_t dma_paddr; - caddr_t dma_vaddr; - bus_dma_tag_t dma_tag; - bus_dmamap_t dma_map; - bus_dma_segment_t dma_seg; - int dma_nseg; -}; - struct adapter; struct em_int_delay_info { @@ -321,35 +356,31 @@ struct em_int_delay_info { */ struct tx_ring { struct adapter *adapter; - struct mtx tx_mtx; - char mtx_name[16]; - u32 me; - u32 msix; - u32 ims; - int busy; - struct em_dma_alloc txdma; struct e1000_tx_desc *tx_base; - struct task tx_task; - struct taskqueue *tq; - u32 next_avail_desc; - u32 next_to_clean; - struct em_txbuffer *tx_buffers; - volatile u16 tx_avail; - u32 tx_tso; /* last tx was tso */ - u16 last_hw_offload; - u8 last_hw_ipcso; - u8 last_hw_ipcss; - u8 last_hw_tucso; - u8 last_hw_tucss; -#if __FreeBSD_version >= 800000 - struct buf_ring *br; -#endif + uint64_t tx_paddr; + qidx_t *tx_rsq; + bool tx_tso; /* last tx was tso */ + uint8_t me; + qidx_t tx_rs_cidx; + qidx_t tx_rs_pidx; + qidx_t tx_cidx_processed; /* Interrupt resources */ - bus_dma_tag_t txtag; void *tag; struct resource *res; unsigned long tx_irq; unsigned long no_desc_avail; + + /* Saved csum offloading context information */ + int csum_flags; + int csum_lhlen; + int csum_iphlen; + + int csum_thlen; + int csum_mss; + int csum_pktlen; + + uint32_t csum_txd_upper; + uint32_t csum_txd_lower; /* last field */ }; /* @@ -357,26 +388,15 @@ struct tx_ring { */ struct rx_ring { struct adapter *adapter; + struct em_rx_queue *que; u32 me; - u32 msix; - u32 ims; - struct mtx rx_mtx; - char mtx_name[16]; u32 payload; - struct task rx_task; - struct taskqueue *tq; union e1000_rx_desc_extended *rx_base; - struct em_dma_alloc rxdma; - u32 next_to_refresh; - u32 next_to_check; - struct em_rxbuffer *rx_buffers; - struct mbuf *fmp; - struct mbuf *lmp; + uint64_t rx_paddr; /* Interrupt resources */ void *tag; struct resource *res; - bus_dma_tag_t rxtag; bool discard; /* Soft stats */ @@ -386,62 +406,68 @@ struct rx_ring { unsigned long rx_bytes; }; +struct em_tx_queue { + struct adapter *adapter; + u32 msix; + u32 eims; /* This queue's EIMS bit */ + u32 me; + struct tx_ring txr; +}; + +struct em_rx_queue { + struct adapter *adapter; + u32 me; + u32 msix; + u32 eims; + struct rx_ring rxr; + u64 irqs; + struct if_irq que_irq; +}; /* Our adapter structure */ struct adapter { - if_t ifp; + struct ifnet *ifp; struct e1000_hw hw; + if_softc_ctx_t shared; + if_ctx_t ctx; +#define tx_num_queues shared->isc_ntxqsets +#define rx_num_queues shared->isc_nrxqsets +#define intr_type shared->isc_intr /* FreeBSD operating-system-specific structures. */ struct e1000_osdep osdep; device_t dev; struct cdev *led_dev; + struct em_tx_queue *tx_queues; + struct em_rx_queue *rx_queues; + struct if_irq irq; + struct resource *memory; struct resource *flash; - struct resource *msix_mem; + struct resource *ioport; + int io_rid; struct resource *res; void *tag; u32 linkvec; u32 ivars; - struct ifmedia media; - struct callout timer; + struct ifmedia *media; int msix; int if_flags; - int max_frame_size; int min_frame_size; - struct mtx core_mtx; int em_insert_vlan_header; u32 ims; bool in_detach; /* Task for FAST handling */ - struct task link_task; - struct task que_task; - struct taskqueue *tq; /* private task queue */ - - eventhandler_tag vlan_attach; - eventhandler_tag vlan_detach; - - u16 num_vlans; - u8 num_queues; - - /* - * Transmit rings: - * Allocated at run time, an array of rings. - */ - struct tx_ring *tx_rings; - int num_tx_desc; + struct grouptask link_task; + + u16 num_vlans; u32 txd_cmd; - /* - * Receive rings: - * Allocated at run time, an array of rings. - */ - struct rx_ring *rx_rings; - int num_rx_desc; + u32 tx_process_limit; u32 rx_process_limit; u32 rx_mbuf_sz; @@ -467,7 +493,12 @@ struct adapter { u16 link_speed; u16 link_duplex; u32 smartspeed; + u32 dmac; + int link_mask; + + u64 que_mask; + struct em_int_delay_info tx_int_delay; struct em_int_delay_info tx_abs_int_delay; struct em_int_delay_info rx_int_delay; @@ -501,34 +532,7 @@ typedef struct _em_vendor_info_t { unsigned int index; } em_vendor_info_t; -struct em_txbuffer { - int next_eop; /* Index of the desc to watch */ - struct mbuf *m_head; - bus_dmamap_t map; /* bus_dma map for packet */ -}; - -struct em_rxbuffer { - int next_eop; /* Index of the desc to watch */ - struct mbuf *m_head; - bus_dmamap_t map; /* bus_dma map for packet */ - bus_addr_t paddr; -}; - - -/* -** Find the number of unrefreshed RX descriptors -*/ -static inline u16 -e1000_rx_unrefreshed(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - - if (rxr->next_to_check > rxr->next_to_refresh) - return (rxr->next_to_check - rxr->next_to_refresh - 1); - else - return ((adapter->num_rx_desc + rxr->next_to_check) - - rxr->next_to_refresh - 1); -} +void em_dump_rs(struct adapter *); #define EM_CORE_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->core_mtx, _name, "EM Core Lock", MTX_DEF) diff --git a/freebsd/sys/dev/e1000/if_igb.c b/freebsd/sys/dev/e1000/if_igb.c deleted file mode 100644 index 2e9c7259..00000000 --- a/freebsd/sys/dev/e1000/if_igb.c +++ /dev/null @@ -1,6452 +0,0 @@ -#include - -/****************************************************************************** - - Copyright (c) 2001-2015, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - -******************************************************************************/ -/*$FreeBSD$*/ - - -#include -#include -#include - -#ifdef HAVE_KERNEL_OPTION_HEADERS -#include -#include -#endif - -#include "if_igb.h" - -/********************************************************************* - * Driver version: - *********************************************************************/ -char igb_driver_version[] = "2.5.3-k"; - - -/********************************************************************* - * PCI Device ID Table - * - * Used by probe to select devices to load on - * Last field stores an index into e1000_strings - * Last entry must be all 0s - * - * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } - *********************************************************************/ - -static igb_vendor_info_t igb_vendor_info_array[] = -{ - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0}, - {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, 0, 0, 0}, - /* required last entry */ - {0, 0, 0, 0, 0} -}; - -/********************************************************************* - * Table of branding strings for all supported NICs. - *********************************************************************/ - -static char *igb_strings[] = { - "Intel(R) PRO/1000 Network Connection" -}; - -/********************************************************************* - * Function prototypes - *********************************************************************/ -static int igb_probe(device_t); -static int igb_attach(device_t); -static int igb_detach(device_t); -static int igb_shutdown(device_t); -static int igb_suspend(device_t); -static int igb_resume(device_t); -#ifndef IGB_LEGACY_TX -static int igb_mq_start(struct ifnet *, struct mbuf *); -static int igb_mq_start_locked(struct ifnet *, struct tx_ring *); -static void igb_qflush(struct ifnet *); -static void igb_deferred_mq_start(void *, int); -#else -static void igb_start(struct ifnet *); -static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); -#endif -static int igb_ioctl(struct ifnet *, u_long, caddr_t); -static uint64_t igb_get_counter(if_t, ift_counter); -static void igb_init(void *); -static void igb_init_locked(struct adapter *); -static void igb_stop(void *); -static void igb_media_status(struct ifnet *, struct ifmediareq *); -static int igb_media_change(struct ifnet *); -static void igb_identify_hardware(struct adapter *); -static int igb_allocate_pci_resources(struct adapter *); -static int igb_allocate_msix(struct adapter *); -static int igb_allocate_legacy(struct adapter *); -static int igb_setup_msix(struct adapter *); -static void igb_free_pci_resources(struct adapter *); -static void igb_local_timer(void *); -static void igb_reset(struct adapter *); -static int igb_setup_interface(device_t, struct adapter *); -static int igb_allocate_queues(struct adapter *); -static void igb_configure_queues(struct adapter *); - -static int igb_allocate_transmit_buffers(struct tx_ring *); -static void igb_setup_transmit_structures(struct adapter *); -static void igb_setup_transmit_ring(struct tx_ring *); -static void igb_initialize_transmit_units(struct adapter *); -static void igb_free_transmit_structures(struct adapter *); -static void igb_free_transmit_buffers(struct tx_ring *); - -static int igb_allocate_receive_buffers(struct rx_ring *); -static int igb_setup_receive_structures(struct adapter *); -static int igb_setup_receive_ring(struct rx_ring *); -static void igb_initialize_receive_units(struct adapter *); -static void igb_free_receive_structures(struct adapter *); -static void igb_free_receive_buffers(struct rx_ring *); -static void igb_free_receive_ring(struct rx_ring *); - -static void igb_enable_intr(struct adapter *); -static void igb_disable_intr(struct adapter *); -static void igb_update_stats_counters(struct adapter *); -static bool igb_txeof(struct tx_ring *); - -static __inline void igb_rx_discard(struct rx_ring *, int); -static __inline void igb_rx_input(struct rx_ring *, - struct ifnet *, struct mbuf *, u32); - -static bool igb_rxeof(struct igb_queue *, int, int *); -static void igb_rx_checksum(u32, struct mbuf *, u32); -static int igb_tx_ctx_setup(struct tx_ring *, - struct mbuf *, u32 *, u32 *); -static int igb_tso_setup(struct tx_ring *, - struct mbuf *, u32 *, u32 *); -static void igb_set_promisc(struct adapter *); -static void igb_disable_promisc(struct adapter *); -static void igb_set_multi(struct adapter *); -static void igb_update_link_status(struct adapter *); -static void igb_refresh_mbufs(struct rx_ring *, int); - -static void igb_register_vlan(void *, struct ifnet *, u16); -static void igb_unregister_vlan(void *, struct ifnet *, u16); -static void igb_setup_vlan_hw_support(struct adapter *); - -static int igb_xmit(struct tx_ring *, struct mbuf **); -static int igb_dma_malloc(struct adapter *, bus_size_t, - struct igb_dma_alloc *, int); -static void igb_dma_free(struct adapter *, struct igb_dma_alloc *); -static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); -static void igb_print_nvm_info(struct adapter *); -static int igb_is_valid_ether_addr(u8 *); -static void igb_add_hw_stats(struct adapter *); - -static void igb_vf_init_stats(struct adapter *); -static void igb_update_vf_stats_counters(struct adapter *); - -/* Management and WOL Support */ -static void igb_init_manageability(struct adapter *); -static void igb_release_manageability(struct adapter *); -static void igb_get_hw_control(struct adapter *); -static void igb_release_hw_control(struct adapter *); -static void igb_enable_wakeup(device_t); -static void igb_led_func(void *, int); - -static int igb_irq_fast(void *); -static void igb_msix_que(void *); -static void igb_msix_link(void *); -static void igb_handle_que(void *context, int pending); -static void igb_handle_link(void *context, int pending); -static void igb_handle_link_locked(struct adapter *); - -static void igb_set_sysctl_value(struct adapter *, const char *, - const char *, int *, int); -static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS); -static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS); -static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS); - -#ifdef DEVICE_POLLING -static poll_handler_t igb_poll; -#endif /* POLLING */ - -/********************************************************************* - * FreeBSD Device Interface Entry Points - *********************************************************************/ - -static device_method_t igb_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, igb_probe), - DEVMETHOD(device_attach, igb_attach), - DEVMETHOD(device_detach, igb_detach), - DEVMETHOD(device_shutdown, igb_shutdown), - DEVMETHOD(device_suspend, igb_suspend), - DEVMETHOD(device_resume, igb_resume), - DEVMETHOD_END -}; - -static driver_t igb_driver = { - "igb", igb_methods, sizeof(struct adapter), -}; - -static devclass_t igb_devclass; -DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); -MODULE_DEPEND(igb, pci, 1, 1, 1); -MODULE_DEPEND(igb, ether, 1, 1, 1); -#ifdef DEV_NETMAP -MODULE_DEPEND(igb, netmap, 1, 1, 1); -#endif /* DEV_NETMAP */ - -/********************************************************************* - * Tunable default values. - *********************************************************************/ - -static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters"); - -/* Descriptor defaults */ -static int igb_rxd = IGB_DEFAULT_RXD; -static int igb_txd = IGB_DEFAULT_TXD; -SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0, - "Number of receive descriptors per queue"); -SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0, - "Number of transmit descriptors per queue"); - -/* -** AIM: Adaptive Interrupt Moderation -** which means that the interrupt rate -** is varied over time based on the -** traffic for that interrupt vector -*/ -static int igb_enable_aim = TRUE; -SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0, - "Enable adaptive interrupt moderation"); - -/* - * MSIX should be the default for best performance, - * but this allows it to be forced off for testing. - */ -static int igb_enable_msix = 1; -SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0, - "Enable MSI-X interrupts"); - -/* -** Tuneable Interrupt rate -*/ -static int igb_max_interrupt_rate = 8000; -SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, - &igb_max_interrupt_rate, 0, "Maximum interrupts per second"); - -#ifndef IGB_LEGACY_TX -/* -** Tuneable number of buffers in the buf-ring (drbr_xxx) -*/ -static int igb_buf_ring_size = IGB_BR_SIZE; -SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN, - &igb_buf_ring_size, 0, "Size of the bufring"); -#endif - -/* -** Header split causes the packet header to -** be dma'd to a separate mbuf from the payload. -** this can have memory alignment benefits. But -** another plus is that small packets often fit -** into the header and thus use no cluster. Its -** a very workload dependent type feature. -*/ -static int igb_header_split = FALSE; -SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0, - "Enable receive mbuf header split"); - -/* -** This will autoconfigure based on the -** number of CPUs and max supported -** MSIX messages if left at 0. -*/ -static int igb_num_queues = 0; -SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0, - "Number of queues to configure, 0 indicates autoconfigure"); - -/* -** Global variable to store last used CPU when binding queues -** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a -** queue is bound to a cpu. -*/ -static int igb_last_bind_cpu = -1; - -/* How many packets rxeof tries to clean at a time */ -static int igb_rx_process_limit = 100; -SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, - &igb_rx_process_limit, 0, - "Maximum number of received packets to process at a time, -1 means unlimited"); - -/* How many packets txeof tries to clean at a time */ -static int igb_tx_process_limit = -1; -SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, - &igb_tx_process_limit, 0, - "Maximum number of sent packets to process at a time, -1 means unlimited"); - -#ifdef DEV_NETMAP /* see ixgbe.c for details */ -#include -#endif /* DEV_NETMAP */ -/********************************************************************* - * Device identification routine - * - * igb_probe determines if the driver should be loaded on - * adapter based on PCI vendor/device id of the adapter. - * - * return BUS_PROBE_DEFAULT on success, positive on failure - *********************************************************************/ - -static int -igb_probe(device_t dev) -{ - char adapter_name[256]; - uint16_t pci_vendor_id = 0; - uint16_t pci_device_id = 0; - uint16_t pci_subvendor_id = 0; - uint16_t pci_subdevice_id = 0; - igb_vendor_info_t *ent; - - INIT_DEBUGOUT("igb_probe: begin"); - - pci_vendor_id = pci_get_vendor(dev); - if (pci_vendor_id != IGB_INTEL_VENDOR_ID) - return (ENXIO); - - pci_device_id = pci_get_device(dev); - pci_subvendor_id = pci_get_subvendor(dev); - pci_subdevice_id = pci_get_subdevice(dev); - - ent = igb_vendor_info_array; - while (ent->vendor_id != 0) { - if ((pci_vendor_id == ent->vendor_id) && - (pci_device_id == ent->device_id) && - - ((pci_subvendor_id == ent->subvendor_id) || - (ent->subvendor_id == 0)) && - - ((pci_subdevice_id == ent->subdevice_id) || - (ent->subdevice_id == 0))) { - sprintf(adapter_name, "%s, Version - %s", - igb_strings[ent->index], - igb_driver_version); - device_set_desc_copy(dev, adapter_name); - return (BUS_PROBE_DEFAULT); - } - ent++; - } - return (ENXIO); -} - -/********************************************************************* - * Device initialization routine - * - * The attach entry point is called when the driver is being loaded. - * This routine identifies the type of hardware, allocates all resources - * and initializes the hardware. - * - * return 0 on success, positive on failure - *********************************************************************/ - -static int -igb_attach(device_t dev) -{ - struct adapter *adapter; - int error = 0; - u16 eeprom_data; - - INIT_DEBUGOUT("igb_attach: begin"); - - if (resource_disabled("igb", device_get_unit(dev))) { - device_printf(dev, "Disabled by device hint\n"); - return (ENXIO); - } - - adapter = device_get_softc(dev); - adapter->dev = adapter->osdep.dev = dev; - IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); - - /* SYSCTLs */ - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, - igb_sysctl_nvm_info, "I", "NVM Information"); - - igb_set_sysctl_value(adapter, "enable_aim", - "Interrupt Moderation", &adapter->enable_aim, - igb_enable_aim); - - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, - adapter, 0, igb_set_flowcntl, "I", "Flow Control"); - - callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); - - /* Determine hardware and mac info */ - igb_identify_hardware(adapter); - - /* Setup PCI resources */ - if (igb_allocate_pci_resources(adapter)) { - device_printf(dev, "Allocation of PCI resources failed\n"); - error = ENXIO; - goto err_pci; - } - - /* Do Shared Code initialization */ - if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { - device_printf(dev, "Setup of Shared code failed\n"); - error = ENXIO; - goto err_pci; - } - - e1000_get_bus_info(&adapter->hw); - - /* Sysctls for limiting the amount of work done in the taskqueues */ - igb_set_sysctl_value(adapter, "rx_processing_limit", - "max number of rx packets to process", - &adapter->rx_process_limit, igb_rx_process_limit); - - igb_set_sysctl_value(adapter, "tx_processing_limit", - "max number of tx packets to process", - &adapter->tx_process_limit, igb_tx_process_limit); - - /* - * Validate number of transmit and receive descriptors. It - * must not exceed hardware maximum, and must be multiple - * of E1000_DBA_ALIGN. - */ - if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 || - (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) { - device_printf(dev, "Using %d TX descriptors instead of %d!\n", - IGB_DEFAULT_TXD, igb_txd); - adapter->num_tx_desc = IGB_DEFAULT_TXD; - } else - adapter->num_tx_desc = igb_txd; - if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 || - (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) { - device_printf(dev, "Using %d RX descriptors instead of %d!\n", - IGB_DEFAULT_RXD, igb_rxd); - adapter->num_rx_desc = IGB_DEFAULT_RXD; - } else - adapter->num_rx_desc = igb_rxd; - - adapter->hw.mac.autoneg = DO_AUTO_NEG; - adapter->hw.phy.autoneg_wait_to_complete = FALSE; - adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; - - /* Copper options */ - if (adapter->hw.phy.media_type == e1000_media_type_copper) { - adapter->hw.phy.mdix = AUTO_ALL_MODES; - adapter->hw.phy.disable_polarity_correction = FALSE; - adapter->hw.phy.ms_type = IGB_MASTER_SLAVE; - } - - /* - * Set the frame limits assuming - * standard ethernet sized frames. - */ - adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; - - /* - ** Allocate and Setup Queues - */ - if (igb_allocate_queues(adapter)) { - error = ENOMEM; - goto err_pci; - } - - /* Allocate the appropriate stats memory */ - if (adapter->vf_ifp) { - adapter->stats = - (struct e1000_vf_stats *)malloc(sizeof \ - (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO); - igb_vf_init_stats(adapter); - } else - adapter->stats = - (struct e1000_hw_stats *)malloc(sizeof \ - (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO); - if (adapter->stats == NULL) { - device_printf(dev, "Can not allocate stats memory\n"); - error = ENOMEM; - goto err_late; - } - - /* Allocate multicast array memory. */ - adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * - MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); - if (adapter->mta == NULL) { - device_printf(dev, "Can not allocate multicast setup array\n"); - error = ENOMEM; - goto err_late; - } - - /* Some adapter-specific advanced features */ - if (adapter->hw.mac.type >= e1000_i350) { - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW, - adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce"); - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW, - adapter, 0, igb_sysctl_eee, "I", - "Disable Energy Efficient Ethernet"); - if (adapter->hw.phy.media_type == e1000_media_type_copper) { - if (adapter->hw.mac.type == e1000_i354) - e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); - else - e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); - } - } - - /* - ** Start from a known state, this is - ** important in reading the nvm and - ** mac from that. - */ - e1000_reset_hw(&adapter->hw); - - /* Make sure we have a good EEPROM before we read from it */ - if (((adapter->hw.mac.type != e1000_i210) && - (adapter->hw.mac.type != e1000_i211)) && - (e1000_validate_nvm_checksum(&adapter->hw) < 0)) { - /* - ** Some PCI-E parts fail the first check due to - ** the link being in sleep state, call it again, - ** if it fails a second time its a real issue. - */ - if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { - device_printf(dev, - "The EEPROM Checksum Is Not Valid\n"); - error = EIO; - goto err_late; - } - } - - /* - ** Copy the permanent MAC address out of the EEPROM - */ - if (e1000_read_mac_addr(&adapter->hw) < 0) { - device_printf(dev, "EEPROM read error while reading MAC" - " address\n"); - error = EIO; - goto err_late; - } - - /* Check its sanity */ - if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) { - if (adapter->vf_ifp) { - u8 addr[ETHER_ADDR_LEN]; - arc4rand(&addr, sizeof(addr), 0); - addr[0] &= 0xFE; - addr[0] |= 0x02; - bcopy(addr, adapter->hw.mac.addr, sizeof(addr)); - } else { - device_printf(dev, "Invalid MAC address\n"); - error = EIO; - goto err_late; - } - } - - /* Setup OS specific network interface */ - if (igb_setup_interface(dev, adapter) != 0) - goto err_late; - - /* Now get a good starting state */ - igb_reset(adapter); - - /* Initialize statistics */ - igb_update_stats_counters(adapter); - - adapter->hw.mac.get_link_status = 1; - igb_update_link_status(adapter); - - /* Indicate SOL/IDER usage */ - if (e1000_check_reset_block(&adapter->hw)) - device_printf(dev, - "PHY reset is blocked due to SOL/IDER session.\n"); - - /* Determine if we have to control management hardware */ - adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); - - /* - * Setup Wake-on-Lan - */ - /* APME bit in EEPROM is mapped to WUC.APME */ - eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME; - if (eeprom_data) - adapter->wol = E1000_WUFC_MAG; - - /* Register for VLAN events */ - adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, - igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); - adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, - igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); - - igb_add_hw_stats(adapter); - - /* Tell the stack that the interface is not active */ - adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE; - - adapter->led_dev = led_create(igb_led_func, adapter, - device_get_nameunit(dev)); - - /* - ** Configure Interrupts - */ - if ((adapter->msix > 1) && (igb_enable_msix)) - error = igb_allocate_msix(adapter); - else /* MSI or Legacy */ - error = igb_allocate_legacy(adapter); - if (error) - goto err_late; - -#ifdef DEV_NETMAP - igb_netmap_attach(adapter); -#endif /* DEV_NETMAP */ - INIT_DEBUGOUT("igb_attach: end"); - - return (0); - -err_late: - if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */ - return(error); - igb_free_transmit_structures(adapter); - igb_free_receive_structures(adapter); - igb_release_hw_control(adapter); -err_pci: - igb_free_pci_resources(adapter); - if (adapter->ifp != NULL) - if_free(adapter->ifp); - free(adapter->mta, M_DEVBUF); - IGB_CORE_LOCK_DESTROY(adapter); - - return (error); -} - -/********************************************************************* - * Device removal routine - * - * The detach entry point is called when the driver is being removed. - * This routine stops the adapter and deallocates all the resources - * that were allocated for driver operation. - * - * return 0 on success, positive on failure - *********************************************************************/ - -static int -igb_detach(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - struct ifnet *ifp = adapter->ifp; - - INIT_DEBUGOUT("igb_detach: begin"); - - /* Make sure VLANS are not using driver */ - if (adapter->ifp->if_vlantrunk != NULL) { - device_printf(dev,"Vlan in use, detach first\n"); - return (EBUSY); - } - - ether_ifdetach(adapter->ifp); - - if (adapter->led_dev != NULL) - led_destroy(adapter->led_dev); - -#ifdef DEVICE_POLLING - if (ifp->if_capenable & IFCAP_POLLING) - ether_poll_deregister(ifp); -#endif - - IGB_CORE_LOCK(adapter); - adapter->in_detach = 1; - igb_stop(adapter); - IGB_CORE_UNLOCK(adapter); - - e1000_phy_hw_reset(&adapter->hw); - - /* Give control back to firmware */ - igb_release_manageability(adapter); - igb_release_hw_control(adapter); - - if (adapter->wol) { - E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); - E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); - igb_enable_wakeup(dev); - } - - /* Unregister VLAN events */ - if (adapter->vlan_attach != NULL) - EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); - if (adapter->vlan_detach != NULL) - EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); - - callout_drain(&adapter->timer); - -#ifdef DEV_NETMAP - netmap_detach(adapter->ifp); -#endif /* DEV_NETMAP */ - igb_free_pci_resources(adapter); - bus_generic_detach(dev); - if_free(ifp); - - igb_free_transmit_structures(adapter); - igb_free_receive_structures(adapter); - if (adapter->mta != NULL) - free(adapter->mta, M_DEVBUF); - - IGB_CORE_LOCK_DESTROY(adapter); - - return (0); -} - -/********************************************************************* - * - * Shutdown entry point - * - **********************************************************************/ - -static int -igb_shutdown(device_t dev) -{ - return igb_suspend(dev); -} - -/* - * Suspend/resume device methods. - */ -static int -igb_suspend(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - - IGB_CORE_LOCK(adapter); - - igb_stop(adapter); - - igb_release_manageability(adapter); - igb_release_hw_control(adapter); - - if (adapter->wol) { - E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); - E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); - igb_enable_wakeup(dev); - } - - IGB_CORE_UNLOCK(adapter); - - return bus_generic_suspend(dev); -} - -static int -igb_resume(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - struct tx_ring *txr = adapter->tx_rings; - struct ifnet *ifp = adapter->ifp; - - IGB_CORE_LOCK(adapter); - igb_init_locked(adapter); - igb_init_manageability(adapter); - - if ((ifp->if_flags & IFF_UP) && - (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IGB_TX_LOCK(txr); -#ifndef IGB_LEGACY_TX - /* Process the stack queue only if not depleted */ - if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && - !drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - igb_start_locked(txr, ifp); -#endif - IGB_TX_UNLOCK(txr); - } - } - IGB_CORE_UNLOCK(adapter); - - return bus_generic_resume(dev); -} - - -#ifdef IGB_LEGACY_TX - -/********************************************************************* - * Transmit entry point - * - * igb_start is called by the stack to initiate a transmit. - * The driver will remain in this routine as long as there are - * packets to transmit and transmit resources are available. - * In case resources are not available stack is notified and - * the packet is requeued. - **********************************************************************/ - -static void -igb_start_locked(struct tx_ring *txr, struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct mbuf *m_head; - - IGB_TX_LOCK_ASSERT(txr); - - if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != - IFF_DRV_RUNNING) - return; - if (!adapter->link_active) - return; - - /* Call cleanup if number of TX descriptors low */ - if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) - igb_txeof(txr); - - while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { - if (txr->tx_avail <= IGB_MAX_SCATTER) { - txr->queue_status |= IGB_QUEUE_DEPLETED; - break; - } - IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); - if (m_head == NULL) - break; - /* - * Encapsulation can modify our pointer, and or make it - * NULL on failure. In that event, we can't requeue. - */ - if (igb_xmit(txr, &m_head)) { - if (m_head != NULL) - IFQ_DRV_PREPEND(&ifp->if_snd, m_head); - if (txr->tx_avail <= IGB_MAX_SCATTER) - txr->queue_status |= IGB_QUEUE_DEPLETED; - break; - } - - /* Send a copy of the frame to the BPF listener */ - ETHER_BPF_MTAP(ifp, m_head); - - /* Set watchdog on */ - txr->watchdog_time = ticks; - txr->queue_status |= IGB_QUEUE_WORKING; - } -} - -/* - * Legacy TX driver routine, called from the - * stack, always uses tx[0], and spins for it. - * Should not be used with multiqueue tx - */ -static void -igb_start(struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IGB_TX_LOCK(txr); - igb_start_locked(txr, ifp); - IGB_TX_UNLOCK(txr); - } - return; -} - -#else /* ~IGB_LEGACY_TX */ - -/* -** Multiqueue Transmit Entry: -** quick turnaround to the stack -** -*/ -static int -igb_mq_start(struct ifnet *ifp, struct mbuf *m) -{ - struct adapter *adapter = ifp->if_softc; - struct igb_queue *que; - struct tx_ring *txr; - int i, err = 0; -#ifdef RSS - uint32_t bucket_id; -#endif - - /* Which queue to use */ - /* - * When doing RSS, map it to the same outbound queue - * as the incoming flow would be mapped to. - * - * If everything is setup correctly, it should be the - * same bucket that the current CPU we're on is. - */ - if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { -#ifdef RSS - if (rss_hash2bucket(m->m_pkthdr.flowid, - M_HASHTYPE_GET(m), &bucket_id) == 0) { - /* XXX TODO: spit out something if bucket_id > num_queues? */ - i = bucket_id % adapter->num_queues; - } else { -#endif - i = m->m_pkthdr.flowid % adapter->num_queues; -#ifdef RSS - } -#endif - } else { - i = curcpu % adapter->num_queues; - } - txr = &adapter->tx_rings[i]; - que = &adapter->queues[i]; - - err = drbr_enqueue(ifp, txr->br, m); - if (err) - return (err); - if (IGB_TX_TRYLOCK(txr)) { - igb_mq_start_locked(ifp, txr); - IGB_TX_UNLOCK(txr); - } else - taskqueue_enqueue(que->tq, &txr->txq_task); - - return (0); -} - -static int -igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct mbuf *next; - int err = 0, enq = 0; - - IGB_TX_LOCK_ASSERT(txr); - - if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || - adapter->link_active == 0) - return (ENETDOWN); - - /* Process the queue */ - while ((next = drbr_peek(ifp, txr->br)) != NULL) { - if ((err = igb_xmit(txr, &next)) != 0) { - if (next == NULL) { - /* It was freed, move forward */ - drbr_advance(ifp, txr->br); - } else { - /* - * Still have one left, it may not be - * the same since the transmit function - * may have changed it. - */ - drbr_putback(ifp, txr->br, next); - } - break; - } - drbr_advance(ifp, txr->br); - enq++; - if (next->m_flags & M_MCAST && adapter->vf_ifp) - if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); - ETHER_BPF_MTAP(ifp, next); - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; - } - if (enq > 0) { - /* Set the watchdog */ - txr->queue_status |= IGB_QUEUE_WORKING; - txr->watchdog_time = ticks; - } - if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) - igb_txeof(txr); - if (txr->tx_avail <= IGB_MAX_SCATTER) - txr->queue_status |= IGB_QUEUE_DEPLETED; - return (err); -} - -/* - * Called from a taskqueue to drain queued transmit packets. - */ -static void -igb_deferred_mq_start(void *arg, int pending) -{ - struct tx_ring *txr = arg; - struct adapter *adapter = txr->adapter; - struct ifnet *ifp = adapter->ifp; - - IGB_TX_LOCK(txr); - if (!drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr); - IGB_TX_UNLOCK(txr); -} - -/* -** Flush all ring buffers -*/ -static void -igb_qflush(struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; - struct mbuf *m; - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IGB_TX_LOCK(txr); - while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) - m_freem(m); - IGB_TX_UNLOCK(txr); - } - if_qflush(ifp); -} -#endif /* ~IGB_LEGACY_TX */ - -/********************************************************************* - * Ioctl entry point - * - * igb_ioctl is called when the user wants to configure the - * interface. - * - * return 0 on success, positive on failure - **********************************************************************/ - -static int -igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) -{ - struct adapter *adapter = ifp->if_softc; - struct ifreq *ifr = (struct ifreq *)data; -#if defined(INET) || defined(INET6) - struct ifaddr *ifa = (struct ifaddr *)data; -#endif - bool avoid_reset = FALSE; - int error = 0; - - if (adapter->in_detach) - return (error); - - switch (command) { - case SIOCSIFADDR: -#ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) - avoid_reset = TRUE; -#endif -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) - avoid_reset = TRUE; -#endif - /* - ** Calling init results in link renegotiation, - ** so we avoid doing it when possible. - */ - if (avoid_reset) { - ifp->if_flags |= IFF_UP; - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) - igb_init(adapter); -#ifdef INET - if (!(ifp->if_flags & IFF_NOARP)) - arp_ifinit(ifp, ifa); -#endif - } else - error = ether_ioctl(ifp, command, data); - break; - case SIOCSIFMTU: - { - int max_frame_size; - - IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); - - IGB_CORE_LOCK(adapter); - max_frame_size = 9234; - if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - - ETHER_CRC_LEN) { - IGB_CORE_UNLOCK(adapter); - error = EINVAL; - break; - } - - ifp->if_mtu = ifr->ifr_mtu; - adapter->max_frame_size = - ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - igb_init_locked(adapter); - IGB_CORE_UNLOCK(adapter); - break; - } - case SIOCSIFFLAGS: - IOCTL_DEBUGOUT("ioctl rcv'd:\ - SIOCSIFFLAGS (Set Interface Flags)"); - IGB_CORE_LOCK(adapter); - if (ifp->if_flags & IFF_UP) { - if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { - if ((ifp->if_flags ^ adapter->if_flags) & - (IFF_PROMISC | IFF_ALLMULTI)) { - igb_disable_promisc(adapter); - igb_set_promisc(adapter); - } - } else - igb_init_locked(adapter); - } else - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - igb_stop(adapter); - adapter->if_flags = ifp->if_flags; - IGB_CORE_UNLOCK(adapter); - break; - case SIOCADDMULTI: - case SIOCDELMULTI: - IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IGB_CORE_LOCK(adapter); - igb_disable_intr(adapter); - igb_set_multi(adapter); -#ifdef DEVICE_POLLING - if (!(ifp->if_capenable & IFCAP_POLLING)) -#endif - igb_enable_intr(adapter); - IGB_CORE_UNLOCK(adapter); - } - break; - case SIOCSIFMEDIA: - /* Check SOL/IDER usage */ - IGB_CORE_LOCK(adapter); - if (e1000_check_reset_block(&adapter->hw)) { - IGB_CORE_UNLOCK(adapter); - device_printf(adapter->dev, "Media change is" - " blocked due to SOL/IDER session.\n"); - break; - } - IGB_CORE_UNLOCK(adapter); - case SIOCGIFMEDIA: - IOCTL_DEBUGOUT("ioctl rcv'd: \ - SIOCxIFMEDIA (Get/Set Interface Media)"); - error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); - break; - case SIOCSIFCAP: - { - int mask, reinit; - - IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); - reinit = 0; - mask = ifr->ifr_reqcap ^ ifp->if_capenable; -#ifdef DEVICE_POLLING - if (mask & IFCAP_POLLING) { - if (ifr->ifr_reqcap & IFCAP_POLLING) { - error = ether_poll_register(igb_poll, ifp); - if (error) - return (error); - IGB_CORE_LOCK(adapter); - igb_disable_intr(adapter); - ifp->if_capenable |= IFCAP_POLLING; - IGB_CORE_UNLOCK(adapter); - } else { - error = ether_poll_deregister(ifp); - /* Enable interrupt even in error case */ - IGB_CORE_LOCK(adapter); - igb_enable_intr(adapter); - ifp->if_capenable &= ~IFCAP_POLLING; - IGB_CORE_UNLOCK(adapter); - } - } -#endif -#if __FreeBSD_version >= 1000000 - /* HW cannot turn these on/off separately */ - if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { - ifp->if_capenable ^= IFCAP_RXCSUM; - ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; - reinit = 1; - } - if (mask & IFCAP_TXCSUM) { - ifp->if_capenable ^= IFCAP_TXCSUM; - reinit = 1; - } - if (mask & IFCAP_TXCSUM_IPV6) { - ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; - reinit = 1; - } -#else - if (mask & IFCAP_HWCSUM) { - ifp->if_capenable ^= IFCAP_HWCSUM; - reinit = 1; - } -#endif - if (mask & IFCAP_TSO4) { - ifp->if_capenable ^= IFCAP_TSO4; - reinit = 1; - } - if (mask & IFCAP_TSO6) { - ifp->if_capenable ^= IFCAP_TSO6; - reinit = 1; - } - if (mask & IFCAP_VLAN_HWTAGGING) { - ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; - reinit = 1; - } - if (mask & IFCAP_VLAN_HWFILTER) { - ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; - reinit = 1; - } - if (mask & IFCAP_VLAN_HWTSO) { - ifp->if_capenable ^= IFCAP_VLAN_HWTSO; - reinit = 1; - } - if (mask & IFCAP_LRO) { - ifp->if_capenable ^= IFCAP_LRO; - reinit = 1; - } - if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) - igb_init(adapter); - VLAN_CAPABILITIES(ifp); - break; - } - - default: - error = ether_ioctl(ifp, command, data); - break; - } - - return (error); -} - - -/********************************************************************* - * Init entry point - * - * This routine is used in two ways. It is used by the stack as - * init entry point in network interface structure. It is also used - * by the driver as a hw/sw initialization routine to get to a - * consistent state. - * - * return 0 on success, positive on failure - **********************************************************************/ - -static void -igb_init_locked(struct adapter *adapter) -{ - struct ifnet *ifp = adapter->ifp; - device_t dev = adapter->dev; - - INIT_DEBUGOUT("igb_init: begin"); - - IGB_CORE_LOCK_ASSERT(adapter); - - igb_disable_intr(adapter); - callout_stop(&adapter->timer); - - /* Get the latest mac address, User can use a LAA */ - bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr, - ETHER_ADDR_LEN); - - /* Put the address into the Receive Address Array */ - e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); - - igb_reset(adapter); - igb_update_link_status(adapter); - - E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); - - /* Set hardware offload abilities */ - ifp->if_hwassist = 0; - if (ifp->if_capenable & IFCAP_TXCSUM) { -#if __FreeBSD_version >= 1000000 - ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP); - if (adapter->hw.mac.type != e1000_82575) - ifp->if_hwassist |= CSUM_IP_SCTP; -#else - ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); -#if __FreeBSD_version >= 800000 - if (adapter->hw.mac.type != e1000_82575) - ifp->if_hwassist |= CSUM_SCTP; -#endif -#endif - } - -#if __FreeBSD_version >= 1000000 - if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) { - ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP); - if (adapter->hw.mac.type != e1000_82575) - ifp->if_hwassist |= CSUM_IP6_SCTP; - } -#endif - if (ifp->if_capenable & IFCAP_TSO) - ifp->if_hwassist |= CSUM_TSO; - - /* Clear bad data from Rx FIFOs */ - e1000_rx_fifo_flush_82575(&adapter->hw); - - /* Configure for OS presence */ - igb_init_manageability(adapter); - - /* Prepare transmit descriptors and buffers */ - igb_setup_transmit_structures(adapter); - igb_initialize_transmit_units(adapter); - - /* Setup Multicast table */ - igb_set_multi(adapter); - - /* - ** Figure out the desired mbuf pool - ** for doing jumbo/packetsplit - */ - if (adapter->max_frame_size <= 2048) - adapter->rx_mbuf_sz = MCLBYTES; - else if (adapter->max_frame_size <= 4096) - adapter->rx_mbuf_sz = MJUMPAGESIZE; - else - adapter->rx_mbuf_sz = MJUM9BYTES; - - /* Prepare receive descriptors and buffers */ - if (igb_setup_receive_structures(adapter)) { - device_printf(dev, "Could not setup receive structures\n"); - return; - } - igb_initialize_receive_units(adapter); - - /* Enable VLAN support */ - if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) - igb_setup_vlan_hw_support(adapter); - - /* Don't lose promiscuous settings */ - igb_set_promisc(adapter); - - ifp->if_drv_flags |= IFF_DRV_RUNNING; - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - - callout_reset(&adapter->timer, hz, igb_local_timer, adapter); - e1000_clear_hw_cntrs_base_generic(&adapter->hw); - - if (adapter->msix > 1) /* Set up queue routing */ - igb_configure_queues(adapter); - - /* this clears any pending interrupts */ - E1000_READ_REG(&adapter->hw, E1000_ICR); -#ifdef DEVICE_POLLING - /* - * Only enable interrupts if we are not polling, make sure - * they are off otherwise. - */ - if (ifp->if_capenable & IFCAP_POLLING) - igb_disable_intr(adapter); - else -#endif /* DEVICE_POLLING */ - { - igb_enable_intr(adapter); - E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); - } - - /* Set Energy Efficient Ethernet */ - if (adapter->hw.phy.media_type == e1000_media_type_copper) { - if (adapter->hw.mac.type == e1000_i354) - e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); - else - e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); - } -} - -static void -igb_init(void *arg) -{ - struct adapter *adapter = arg; - - IGB_CORE_LOCK(adapter); - igb_init_locked(adapter); - IGB_CORE_UNLOCK(adapter); -} - - -static void -igb_handle_que(void *context, int pending) -{ - struct igb_queue *que = context; - struct adapter *adapter = que->adapter; - struct tx_ring *txr = que->txr; - struct ifnet *ifp = adapter->ifp; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - bool more; - - more = igb_rxeof(que, adapter->rx_process_limit, NULL); - - IGB_TX_LOCK(txr); - igb_txeof(txr); -#ifndef IGB_LEGACY_TX - /* Process the stack queue only if not depleted */ - if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && - !drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - igb_start_locked(txr, ifp); -#endif - IGB_TX_UNLOCK(txr); - /* Do we need another? */ - if (more) { - taskqueue_enqueue(que->tq, &que->que_task); - return; - } - } - -#ifdef DEVICE_POLLING - if (ifp->if_capenable & IFCAP_POLLING) - return; -#endif - /* Reenable this interrupt */ - if (que->eims) - E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); - else - igb_enable_intr(adapter); -} - -/* Deal with link in a sleepable context */ -static void -igb_handle_link(void *context, int pending) -{ - struct adapter *adapter = context; - - IGB_CORE_LOCK(adapter); - igb_handle_link_locked(adapter); - IGB_CORE_UNLOCK(adapter); -} - -static void -igb_handle_link_locked(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - struct ifnet *ifp = adapter->ifp; - - IGB_CORE_LOCK_ASSERT(adapter); - adapter->hw.mac.get_link_status = 1; - igb_update_link_status(adapter); - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IGB_TX_LOCK(txr); -#ifndef IGB_LEGACY_TX - /* Process the stack queue only if not depleted */ - if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && - !drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - igb_start_locked(txr, ifp); -#endif - IGB_TX_UNLOCK(txr); - } - } -} - -/********************************************************************* - * - * MSI/Legacy Deferred - * Interrupt Service routine - * - *********************************************************************/ -static int -igb_irq_fast(void *arg) -{ - struct adapter *adapter = arg; - struct igb_queue *que = adapter->queues; - u32 reg_icr; - - - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - - /* Hot eject? */ - if (reg_icr == 0xffffffff) - return FILTER_STRAY; - - /* Definitely not our interrupt. */ - if (reg_icr == 0x0) - return FILTER_STRAY; - - if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) - return FILTER_STRAY; - - /* - * Mask interrupts until the taskqueue is finished running. This is - * cheap, just assume that it is needed. This also works around the - * MSI message reordering errata on certain systems. - */ - igb_disable_intr(adapter); - taskqueue_enqueue(que->tq, &que->que_task); - - /* Link status change */ - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) - taskqueue_enqueue(que->tq, &adapter->link_task); - - if (reg_icr & E1000_ICR_RXO) - adapter->rx_overruns++; - return FILTER_HANDLED; -} - -#ifdef DEVICE_POLLING -#if __FreeBSD_version >= 800000 -#define POLL_RETURN_COUNT(a) (a) -static int -#else -#define POLL_RETURN_COUNT(a) -static void -#endif -igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) -{ - struct adapter *adapter = ifp->if_softc; - struct igb_queue *que; - struct tx_ring *txr; - u32 reg_icr, rx_done = 0; - u32 loop = IGB_MAX_LOOP; - bool more; - - IGB_CORE_LOCK(adapter); - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { - IGB_CORE_UNLOCK(adapter); - return POLL_RETURN_COUNT(rx_done); - } - - if (cmd == POLL_AND_CHECK_STATUS) { - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - /* Link status change */ - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) - igb_handle_link_locked(adapter); - - if (reg_icr & E1000_ICR_RXO) - adapter->rx_overruns++; - } - IGB_CORE_UNLOCK(adapter); - - for (int i = 0; i < adapter->num_queues; i++) { - que = &adapter->queues[i]; - txr = que->txr; - - igb_rxeof(que, count, &rx_done); - - IGB_TX_LOCK(txr); - do { - more = igb_txeof(txr); - } while (loop-- && more); -#ifndef IGB_LEGACY_TX - if (!drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - igb_start_locked(txr, ifp); -#endif - IGB_TX_UNLOCK(txr); - } - - return POLL_RETURN_COUNT(rx_done); -} -#endif /* DEVICE_POLLING */ - -/********************************************************************* - * - * MSIX Que Interrupt Service routine - * - **********************************************************************/ -static void -igb_msix_que(void *arg) -{ - struct igb_queue *que = arg; - struct adapter *adapter = que->adapter; - struct ifnet *ifp = adapter->ifp; - struct tx_ring *txr = que->txr; - struct rx_ring *rxr = que->rxr; - u32 newitr = 0; - bool more_rx; - - /* Ignore spurious interrupts */ - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - return; - - E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims); - ++que->irqs; - - IGB_TX_LOCK(txr); - igb_txeof(txr); -#ifndef IGB_LEGACY_TX - /* Process the stack queue only if not depleted */ - if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && - !drbr_empty(ifp, txr->br)) - igb_mq_start_locked(ifp, txr); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - igb_start_locked(txr, ifp); -#endif - IGB_TX_UNLOCK(txr); - - more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL); - - if (adapter->enable_aim == FALSE) - goto no_calc; - /* - ** Do Adaptive Interrupt Moderation: - ** - Write out last calculated setting - ** - Calculate based on average size over - ** the last interval. - */ - if (que->eitr_setting) - E1000_WRITE_REG(&adapter->hw, - E1000_EITR(que->msix), que->eitr_setting); - - que->eitr_setting = 0; - - /* Idle, do nothing */ - if ((txr->bytes == 0) && (rxr->bytes == 0)) - goto no_calc; - - /* Used half Default if sub-gig */ - if (adapter->link_speed != 1000) - newitr = IGB_DEFAULT_ITR / 2; - else { - if ((txr->bytes) && (txr->packets)) - newitr = txr->bytes/txr->packets; - if ((rxr->bytes) && (rxr->packets)) - newitr = max(newitr, - (rxr->bytes / rxr->packets)); - newitr += 24; /* account for hardware frame, crc */ - /* set an upper boundary */ - newitr = min(newitr, 3000); - /* Be nice to the mid range */ - if ((newitr > 300) && (newitr < 1200)) - newitr = (newitr / 3); - else - newitr = (newitr / 2); - } - newitr &= 0x7FFC; /* Mask invalid bits */ - if (adapter->hw.mac.type == e1000_82575) - newitr |= newitr << 16; - else - newitr |= E1000_EITR_CNT_IGNR; - - /* save for next interrupt */ - que->eitr_setting = newitr; - - /* Reset state */ - txr->bytes = 0; - txr->packets = 0; - rxr->bytes = 0; - rxr->packets = 0; - -no_calc: - /* Schedule a clean task if needed*/ - if (more_rx) - taskqueue_enqueue(que->tq, &que->que_task); - else - /* Reenable this interrupt */ - E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); - return; -} - - -/********************************************************************* - * - * MSIX Link Interrupt Service routine - * - **********************************************************************/ - -static void -igb_msix_link(void *arg) -{ - struct adapter *adapter = arg; - u32 icr; - - ++adapter->link_irq; - icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - if (!(icr & E1000_ICR_LSC)) - goto spurious; - igb_handle_link(adapter, 0); - -spurious: - /* Rearm */ - E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); - E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); - return; -} - - -/********************************************************************* - * - * Media Ioctl callback - * - * This routine is called whenever the user queries the status of - * the interface using ifconfig. - * - **********************************************************************/ -static void -igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) -{ - struct adapter *adapter = ifp->if_softc; - - INIT_DEBUGOUT("igb_media_status: begin"); - - IGB_CORE_LOCK(adapter); - igb_update_link_status(adapter); - - ifmr->ifm_status = IFM_AVALID; - ifmr->ifm_active = IFM_ETHER; - - if (!adapter->link_active) { - IGB_CORE_UNLOCK(adapter); - return; - } - - ifmr->ifm_status |= IFM_ACTIVE; - - switch (adapter->link_speed) { - case 10: - ifmr->ifm_active |= IFM_10_T; - break; - case 100: - /* - ** Support for 100Mb SFP - these are Fiber - ** but the media type appears as serdes - */ - if (adapter->hw.phy.media_type == - e1000_media_type_internal_serdes) - ifmr->ifm_active |= IFM_100_FX; - else - ifmr->ifm_active |= IFM_100_TX; - break; - case 1000: - ifmr->ifm_active |= IFM_1000_T; - break; - case 2500: - ifmr->ifm_active |= IFM_2500_SX; - break; - } - - if (adapter->link_duplex == FULL_DUPLEX) - ifmr->ifm_active |= IFM_FDX; - else - ifmr->ifm_active |= IFM_HDX; - - IGB_CORE_UNLOCK(adapter); -} - -/********************************************************************* - * - * Media Ioctl callback - * - * This routine is called when the user changes speed/duplex using - * media/mediopt option with ifconfig. - * - **********************************************************************/ -static int -igb_media_change(struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct ifmedia *ifm = &adapter->media; - - INIT_DEBUGOUT("igb_media_change: begin"); - - if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) - return (EINVAL); - - IGB_CORE_LOCK(adapter); - switch (IFM_SUBTYPE(ifm->ifm_media)) { - case IFM_AUTO: - adapter->hw.mac.autoneg = DO_AUTO_NEG; - adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; - break; - case IFM_1000_LX: - case IFM_1000_SX: - case IFM_1000_T: - adapter->hw.mac.autoneg = DO_AUTO_NEG; - adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; - break; - case IFM_100_TX: - adapter->hw.mac.autoneg = FALSE; - adapter->hw.phy.autoneg_advertised = 0; - if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) - adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; - else - adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; - break; - case IFM_10_T: - adapter->hw.mac.autoneg = FALSE; - adapter->hw.phy.autoneg_advertised = 0; - if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) - adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; - else - adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; - break; - default: - device_printf(adapter->dev, "Unsupported media type\n"); - } - - igb_init_locked(adapter); - IGB_CORE_UNLOCK(adapter); - - return (0); -} - - -/********************************************************************* - * - * This routine maps the mbufs to Advanced TX descriptors. - * - **********************************************************************/ -static int -igb_xmit(struct tx_ring *txr, struct mbuf **m_headp) -{ - struct adapter *adapter = txr->adapter; - u32 olinfo_status = 0, cmd_type_len; - int i, j, error, nsegs; - int first; - bool remap = TRUE; - struct mbuf *m_head; - bus_dma_segment_t segs[IGB_MAX_SCATTER]; - bus_dmamap_t map; - struct igb_tx_buf *txbuf; - union e1000_adv_tx_desc *txd = NULL; - - m_head = *m_headp; - - /* Basic descriptor defines */ - cmd_type_len = (E1000_ADVTXD_DTYP_DATA | - E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); - - if (m_head->m_flags & M_VLANTAG) - cmd_type_len |= E1000_ADVTXD_DCMD_VLE; - - /* - * Important to capture the first descriptor - * used because it will contain the index of - * the one we tell the hardware to report back - */ - first = txr->next_avail_desc; - txbuf = &txr->tx_buffers[first]; - map = txbuf->map; - - /* - * Map the packet for DMA. - */ -retry: - error = bus_dmamap_load_mbuf_sg(txr->txtag, map, - *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); - - if (__predict_false(error)) { - struct mbuf *m; - - switch (error) { - case EFBIG: - /* Try it again? - one try */ - if (remap == TRUE) { - remap = FALSE; - m = m_collapse(*m_headp, M_NOWAIT, - IGB_MAX_SCATTER); - if (m == NULL) { - adapter->mbuf_defrag_failed++; - m_freem(*m_headp); - *m_headp = NULL; - return (ENOBUFS); - } - *m_headp = m; - goto retry; - } else - return (error); - default: - txr->no_tx_dma_setup++; - m_freem(*m_headp); - *m_headp = NULL; - return (error); - } - } - - /* Make certain there are enough descriptors */ - if (txr->tx_avail < (nsegs + 2)) { - txr->no_desc_avail++; - bus_dmamap_unload(txr->txtag, map); - return (ENOBUFS); - } - m_head = *m_headp; - - /* - ** Set up the appropriate offload context - ** this will consume the first descriptor - */ - error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); - if (__predict_false(error)) { - m_freem(*m_headp); - *m_headp = NULL; - return (error); - } - - /* 82575 needs the queue index added */ - if (adapter->hw.mac.type == e1000_82575) - olinfo_status |= txr->me << 4; - - i = txr->next_avail_desc; - for (j = 0; j < nsegs; j++) { - bus_size_t seglen; - bus_addr_t segaddr; - - txbuf = &txr->tx_buffers[i]; - txd = &txr->tx_base[i]; - seglen = segs[j].ds_len; - segaddr = htole64(segs[j].ds_addr); - - txd->read.buffer_addr = segaddr; - txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | - cmd_type_len | seglen); - txd->read.olinfo_status = htole32(olinfo_status); - - if (++i == txr->num_desc) - i = 0; - } - - txd->read.cmd_type_len |= - htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); - txr->tx_avail -= nsegs; - txr->next_avail_desc = i; - - txbuf->m_head = m_head; - /* - ** Here we swap the map so the last descriptor, - ** which gets the completion interrupt has the - ** real map, and the first descriptor gets the - ** unused map from this descriptor. - */ - txr->tx_buffers[first].map = txbuf->map; - txbuf->map = map; - bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); - - /* Set the EOP descriptor that will be marked done */ - txbuf = &txr->tx_buffers[first]; - txbuf->eop = txd; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * Advance the Transmit Descriptor Tail (Tdt), this tells the - * hardware that this frame is available to transmit. - */ - ++txr->total_packets; - E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); - - return (0); -} -static void -igb_set_promisc(struct adapter *adapter) -{ - struct ifnet *ifp = adapter->ifp; - struct e1000_hw *hw = &adapter->hw; - u32 reg; - - if (adapter->vf_ifp) { - e1000_promisc_set_vf(hw, e1000_promisc_enabled); - return; - } - - reg = E1000_READ_REG(hw, E1000_RCTL); - if (ifp->if_flags & IFF_PROMISC) { - reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); - E1000_WRITE_REG(hw, E1000_RCTL, reg); - } else if (ifp->if_flags & IFF_ALLMULTI) { - reg |= E1000_RCTL_MPE; - reg &= ~E1000_RCTL_UPE; - E1000_WRITE_REG(hw, E1000_RCTL, reg); - } -} - -static void -igb_disable_promisc(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - u32 reg; - int mcnt = 0; - - if (adapter->vf_ifp) { - e1000_promisc_set_vf(hw, e1000_promisc_disabled); - return; - } - reg = E1000_READ_REG(hw, E1000_RCTL); - reg &= (~E1000_RCTL_UPE); - if (ifp->if_flags & IFF_ALLMULTI) - mcnt = MAX_NUM_MULTICAST_ADDRESSES; - else { - struct ifmultiaddr *ifma; -#if __FreeBSD_version < 800000 - IF_ADDR_LOCK(ifp); -#else - if_maddr_rlock(ifp); -#endif - TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_LINK) - continue; - if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) - break; - mcnt++; - } -#if __FreeBSD_version < 800000 - IF_ADDR_UNLOCK(ifp); -#else - if_maddr_runlock(ifp); -#endif - } - /* Don't disable if in MAX groups */ - if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) - reg &= (~E1000_RCTL_MPE); - E1000_WRITE_REG(hw, E1000_RCTL, reg); -} - - -/********************************************************************* - * Multicast Update - * - * This routine is called whenever multicast address list is updated. - * - **********************************************************************/ - -static void -igb_set_multi(struct adapter *adapter) -{ - struct ifnet *ifp = adapter->ifp; - struct ifmultiaddr *ifma; - u32 reg_rctl = 0; - u8 *mta; - - int mcnt = 0; - - IOCTL_DEBUGOUT("igb_set_multi: begin"); - - mta = adapter->mta; - bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN * - MAX_NUM_MULTICAST_ADDRESSES); - -#if __FreeBSD_version < 800000 - IF_ADDR_LOCK(ifp); -#else - if_maddr_rlock(ifp); -#endif - TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_LINK) - continue; - - if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) - break; - - bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), - &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); - mcnt++; - } -#if __FreeBSD_version < 800000 - IF_ADDR_UNLOCK(ifp); -#else - if_maddr_runlock(ifp); -#endif - - if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { - reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); - reg_rctl |= E1000_RCTL_MPE; - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); - } else - e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); -} - - -/********************************************************************* - * Timer routine: - * This routine checks for link status, - * updates statistics, and does the watchdog. - * - **********************************************************************/ - -static void -igb_local_timer(void *arg) -{ - struct adapter *adapter = arg; - device_t dev = adapter->dev; - struct ifnet *ifp = adapter->ifp; - struct tx_ring *txr = adapter->tx_rings; - struct igb_queue *que = adapter->queues; - int hung = 0, busy = 0; - - - IGB_CORE_LOCK_ASSERT(adapter); - - igb_update_link_status(adapter); - igb_update_stats_counters(adapter); - - /* - ** Check the TX queues status - ** - central locked handling of OACTIVE - ** - watchdog only if all queues show hung - */ - for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { - if ((txr->queue_status & IGB_QUEUE_HUNG) && - (adapter->pause_frames == 0)) - ++hung; - if (txr->queue_status & IGB_QUEUE_DEPLETED) - ++busy; - if ((txr->queue_status & IGB_QUEUE_IDLE) == 0) - taskqueue_enqueue(que->tq, &que->que_task); - } - if (hung == adapter->num_queues) - goto timeout; - if (busy == adapter->num_queues) - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) && - (busy < adapter->num_queues)) - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - - adapter->pause_frames = 0; - callout_reset(&adapter->timer, hz, igb_local_timer, adapter); -#ifndef DEVICE_POLLING - /* Schedule all queue interrupts - deadlock protection */ - E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask); -#endif - return; - -timeout: - device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); - device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, - E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)), - E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me))); - device_printf(dev,"TX(%d) desc avail = %d," - "Next TX to Clean = %d\n", - txr->me, txr->tx_avail, txr->next_to_clean); - adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - adapter->watchdog_events++; - igb_init_locked(adapter); -} - -static void -igb_update_link_status(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct e1000_fc_info *fc = &hw->fc; - struct ifnet *ifp = adapter->ifp; - device_t dev = adapter->dev; - struct tx_ring *txr = adapter->tx_rings; - u32 link_check, thstat, ctrl; - char *flowctl = NULL; - - link_check = thstat = ctrl = 0; - - /* Get the cached link value or read for real */ - switch (hw->phy.media_type) { - case e1000_media_type_copper: - if (hw->mac.get_link_status) { - /* Do the work to read phy */ - e1000_check_for_link(hw); - link_check = !hw->mac.get_link_status; - } else - link_check = TRUE; - break; - case e1000_media_type_fiber: - e1000_check_for_link(hw); - link_check = (E1000_READ_REG(hw, E1000_STATUS) & - E1000_STATUS_LU); - break; - case e1000_media_type_internal_serdes: - e1000_check_for_link(hw); - link_check = adapter->hw.mac.serdes_has_link; - break; - /* VF device is type_unknown */ - case e1000_media_type_unknown: - e1000_check_for_link(hw); - link_check = !hw->mac.get_link_status; - /* Fall thru */ - default: - break; - } - - /* Check for thermal downshift or shutdown */ - if (hw->mac.type == e1000_i350) { - thstat = E1000_READ_REG(hw, E1000_THSTAT); - ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); - } - - /* Get the flow control for display */ - switch (fc->current_mode) { - case e1000_fc_rx_pause: - flowctl = "RX"; - break; - case e1000_fc_tx_pause: - flowctl = "TX"; - break; - case e1000_fc_full: - flowctl = "Full"; - break; - case e1000_fc_none: - default: - flowctl = "None"; - break; - } - - /* Now we check if a transition has happened */ - if (link_check && (adapter->link_active == 0)) { - e1000_get_speed_and_duplex(&adapter->hw, - &adapter->link_speed, &adapter->link_duplex); - if (bootverbose) - device_printf(dev, "Link is up %d Mbps %s," - " Flow Control: %s\n", - adapter->link_speed, - ((adapter->link_duplex == FULL_DUPLEX) ? - "Full Duplex" : "Half Duplex"), flowctl); - adapter->link_active = 1; - ifp->if_baudrate = adapter->link_speed * 1000000; - if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && - (thstat & E1000_THSTAT_LINK_THROTTLE)) - device_printf(dev, "Link: thermal downshift\n"); - /* Delay Link Up for Phy update */ - if (((hw->mac.type == e1000_i210) || - (hw->mac.type == e1000_i211)) && - (hw->phy.id == I210_I_PHY_ID)) - msec_delay(I210_LINK_DELAY); - /* Reset if the media type changed. */ - if (hw->dev_spec._82575.media_changed) { - hw->dev_spec._82575.media_changed = false; - adapter->flags |= IGB_MEDIA_RESET; - igb_reset(adapter); - } - /* This can sleep */ - if_link_state_change(ifp, LINK_STATE_UP); - } else if (!link_check && (adapter->link_active == 1)) { - ifp->if_baudrate = adapter->link_speed = 0; - adapter->link_duplex = 0; - if (bootverbose) - device_printf(dev, "Link is Down\n"); - if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && - (thstat & E1000_THSTAT_PWR_DOWN)) - device_printf(dev, "Link: thermal shutdown\n"); - adapter->link_active = 0; - /* This can sleep */ - if_link_state_change(ifp, LINK_STATE_DOWN); - /* Reset queue state */ - for (int i = 0; i < adapter->num_queues; i++, txr++) - txr->queue_status = IGB_QUEUE_IDLE; - } -} - -/********************************************************************* - * - * This routine disables all traffic on the adapter by issuing a - * global reset on the MAC and deallocates TX/RX buffers. - * - **********************************************************************/ - -static void -igb_stop(void *arg) -{ - struct adapter *adapter = arg; - struct ifnet *ifp = adapter->ifp; - struct tx_ring *txr = adapter->tx_rings; - - IGB_CORE_LOCK_ASSERT(adapter); - - INIT_DEBUGOUT("igb_stop: begin"); - - igb_disable_intr(adapter); - - callout_stop(&adapter->timer); - - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - - /* Disarm watchdog timer. */ - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IGB_TX_LOCK(txr); - txr->queue_status = IGB_QUEUE_IDLE; - IGB_TX_UNLOCK(txr); - } - - e1000_reset_hw(&adapter->hw); - E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); - - e1000_led_off(&adapter->hw); - e1000_cleanup_led(&adapter->hw); -} - - -/********************************************************************* - * - * Determine hardware revision. - * - **********************************************************************/ -static void -igb_identify_hardware(struct adapter *adapter) -{ - device_t dev = adapter->dev; - - /* Make sure our PCI config space has the necessary stuff set */ - pci_enable_busmaster(dev); - adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); - - /* Save off the information about this board */ - adapter->hw.vendor_id = pci_get_vendor(dev); - adapter->hw.device_id = pci_get_device(dev); - adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); - adapter->hw.subsystem_vendor_id = - pci_read_config(dev, PCIR_SUBVEND_0, 2); - adapter->hw.subsystem_device_id = - pci_read_config(dev, PCIR_SUBDEV_0, 2); - - /* Set MAC type early for PCI setup */ - e1000_set_mac_type(&adapter->hw); - - /* Are we a VF device? */ - if ((adapter->hw.mac.type == e1000_vfadapt) || - (adapter->hw.mac.type == e1000_vfadapt_i350)) - adapter->vf_ifp = 1; - else - adapter->vf_ifp = 0; -} - -static int -igb_allocate_pci_resources(struct adapter *adapter) -{ - device_t dev = adapter->dev; - int rid; - - rid = PCIR_BAR(0); - adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, - &rid, RF_ACTIVE); - if (adapter->pci_mem == NULL) { - device_printf(dev, "Unable to allocate bus resource: memory\n"); - return (ENXIO); - } - adapter->osdep.mem_bus_space_tag = - rman_get_bustag(adapter->pci_mem); - adapter->osdep.mem_bus_space_handle = - rman_get_bushandle(adapter->pci_mem); - adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; - - adapter->num_queues = 1; /* Defaults for Legacy or MSI */ - - /* This will setup either MSI/X or MSI */ - adapter->msix = igb_setup_msix(adapter); - adapter->hw.back = &adapter->osdep; - - return (0); -} - -/********************************************************************* - * - * Setup the Legacy or MSI Interrupt handler - * - **********************************************************************/ -static int -igb_allocate_legacy(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct igb_queue *que = adapter->queues; -#ifndef IGB_LEGACY_TX - struct tx_ring *txr = adapter->tx_rings; -#endif - int error, rid = 0; - - /* Turn off all interrupts */ - E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); - - /* MSI RID is 1 */ - if (adapter->msix == 1) - rid = 1; - - /* We allocate a single interrupt resource */ - adapter->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (adapter->res == NULL) { - device_printf(dev, "Unable to allocate bus resource: " - "interrupt\n"); - return (ENXIO); - } - -#ifndef IGB_LEGACY_TX - TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); -#endif - - /* - * Try allocating a fast interrupt and the associated deferred - * processing contexts. - */ - TASK_INIT(&que->que_task, 0, igb_handle_que, que); - /* Make tasklet for deferred link handling */ - TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter); - que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT, - taskqueue_thread_enqueue, &que->tq); - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq", - device_get_nameunit(adapter->dev)); - if ((error = bus_setup_intr(dev, adapter->res, - INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, - adapter, &adapter->tag)) != 0) { - device_printf(dev, "Failed to register fast interrupt " - "handler: %d\n", error); - taskqueue_free(que->tq); - que->tq = NULL; - return (error); - } - - return (0); -} - - -/********************************************************************* - * - * Setup the MSIX Queue Interrupt handlers: - * - **********************************************************************/ -static int -igb_allocate_msix(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct igb_queue *que = adapter->queues; - int error, rid, vector = 0; - int cpu_id = 0; -#ifdef RSS - cpuset_t cpu_mask; -#endif - - /* Be sure to start with all interrupts disabled */ - E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); - E1000_WRITE_FLUSH(&adapter->hw); - -#ifdef RSS - /* - * If we're doing RSS, the number of queues needs to - * match the number of RSS buckets that are configured. - * - * + If there's more queues than RSS buckets, we'll end - * up with queues that get no traffic. - * - * + If there's more RSS buckets than queues, we'll end - * up having multiple RSS buckets map to the same queue, - * so there'll be some contention. - */ - if (adapter->num_queues != rss_getnumbuckets()) { - device_printf(dev, - "%s: number of queues (%d) != number of RSS buckets (%d)" - "; performance will be impacted.\n", - __func__, - adapter->num_queues, - rss_getnumbuckets()); - } -#endif - - for (int i = 0; i < adapter->num_queues; i++, vector++, que++) { - rid = vector +1; - que->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (que->res == NULL) { - device_printf(dev, - "Unable to allocate bus resource: " - "MSIX Queue Interrupt\n"); - return (ENXIO); - } - error = bus_setup_intr(dev, que->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, - igb_msix_que, que, &que->tag); - if (error) { - que->res = NULL; - device_printf(dev, "Failed to register Queue handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, que->res, que->tag, "que %d", i); -#endif - que->msix = vector; - if (adapter->hw.mac.type == e1000_82575) - que->eims = E1000_EICR_TX_QUEUE0 << i; - else - que->eims = 1 << vector; - -#ifdef RSS - /* - * The queue ID is used as the RSS layer bucket ID. - * We look up the queue ID -> RSS CPU ID and select - * that. - */ - cpu_id = rss_getcpu(i % rss_getnumbuckets()); -#else - /* - * Bind the msix vector, and thus the - * rings to the corresponding cpu. - * - * This just happens to match the default RSS round-robin - * bucket -> queue -> CPU allocation. - */ - if (adapter->num_queues > 1) { - if (igb_last_bind_cpu < 0) - igb_last_bind_cpu = CPU_FIRST(); - cpu_id = igb_last_bind_cpu; - } -#endif - - if (adapter->num_queues > 1) { - bus_bind_intr(dev, que->res, cpu_id); -#ifdef RSS - device_printf(dev, - "Bound queue %d to RSS bucket %d\n", - i, cpu_id); -#else - device_printf(dev, - "Bound queue %d to cpu %d\n", - i, cpu_id); -#endif - } - -#ifndef IGB_LEGACY_TX - TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, - que->txr); -#endif - /* Make tasklet for deferred handling */ - TASK_INIT(&que->que_task, 0, igb_handle_que, que); - que->tq = taskqueue_create("igb_que", M_NOWAIT, - taskqueue_thread_enqueue, &que->tq); - if (adapter->num_queues > 1) { - /* - * Only pin the taskqueue thread to a CPU if - * RSS is in use. - * - * This again just happens to match the default RSS - * round-robin bucket -> queue -> CPU allocation. - */ -#ifdef RSS - CPU_SETOF(cpu_id, &cpu_mask); - taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, - &cpu_mask, - "%s que (bucket %d)", - device_get_nameunit(adapter->dev), - cpu_id); -#else - taskqueue_start_threads(&que->tq, 1, PI_NET, - "%s que (qid %d)", - device_get_nameunit(adapter->dev), - cpu_id); -#endif - } else { - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", - device_get_nameunit(adapter->dev)); - } - - /* Finally update the last bound CPU id */ - if (adapter->num_queues > 1) - igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu); - } - - /* And Link */ - rid = vector + 1; - adapter->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (adapter->res == NULL) { - device_printf(dev, - "Unable to allocate bus resource: " - "MSIX Link Interrupt\n"); - return (ENXIO); - } - if ((error = bus_setup_intr(dev, adapter->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, - igb_msix_link, adapter, &adapter->tag)) != 0) { - device_printf(dev, "Failed to register Link handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, adapter->res, adapter->tag, "link"); -#endif - adapter->linkvec = vector; - - return (0); -} - - -static void -igb_configure_queues(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct igb_queue *que; - u32 tmp, ivar = 0, newitr = 0; - - /* First turn on RSS capability */ - if (adapter->hw.mac.type != e1000_82575) - E1000_WRITE_REG(hw, E1000_GPIE, - E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | - E1000_GPIE_PBA | E1000_GPIE_NSICR); - - /* Turn on MSIX */ - switch (adapter->hw.mac.type) { - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_i210: - case e1000_i211: - case e1000_vfadapt: - case e1000_vfadapt_i350: - /* RX entries */ - for (int i = 0; i < adapter->num_queues; i++) { - u32 index = i >> 1; - ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); - que = &adapter->queues[i]; - if (i & 1) { - ivar &= 0xFF00FFFF; - ivar |= (que->msix | E1000_IVAR_VALID) << 16; - } else { - ivar &= 0xFFFFFF00; - ivar |= que->msix | E1000_IVAR_VALID; - } - E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); - } - /* TX entries */ - for (int i = 0; i < adapter->num_queues; i++) { - u32 index = i >> 1; - ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); - que = &adapter->queues[i]; - if (i & 1) { - ivar &= 0x00FFFFFF; - ivar |= (que->msix | E1000_IVAR_VALID) << 24; - } else { - ivar &= 0xFFFF00FF; - ivar |= (que->msix | E1000_IVAR_VALID) << 8; - } - E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); - adapter->que_mask |= que->eims; - } - - /* And for the link interrupt */ - ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; - adapter->link_mask = 1 << adapter->linkvec; - E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); - break; - case e1000_82576: - /* RX entries */ - for (int i = 0; i < adapter->num_queues; i++) { - u32 index = i & 0x7; /* Each IVAR has two entries */ - ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); - que = &adapter->queues[i]; - if (i < 8) { - ivar &= 0xFFFFFF00; - ivar |= que->msix | E1000_IVAR_VALID; - } else { - ivar &= 0xFF00FFFF; - ivar |= (que->msix | E1000_IVAR_VALID) << 16; - } - E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); - adapter->que_mask |= que->eims; - } - /* TX entries */ - for (int i = 0; i < adapter->num_queues; i++) { - u32 index = i & 0x7; /* Each IVAR has two entries */ - ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); - que = &adapter->queues[i]; - if (i < 8) { - ivar &= 0xFFFF00FF; - ivar |= (que->msix | E1000_IVAR_VALID) << 8; - } else { - ivar &= 0x00FFFFFF; - ivar |= (que->msix | E1000_IVAR_VALID) << 24; - } - E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); - adapter->que_mask |= que->eims; - } - - /* And for the link interrupt */ - ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; - adapter->link_mask = 1 << adapter->linkvec; - E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); - break; - - case e1000_82575: - /* enable MSI-X support*/ - tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); - tmp |= E1000_CTRL_EXT_PBA_CLR; - /* Auto-Mask interrupts upon ICR read. */ - tmp |= E1000_CTRL_EXT_EIAME; - tmp |= E1000_CTRL_EXT_IRCA; - E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); - - /* Queues */ - for (int i = 0; i < adapter->num_queues; i++) { - que = &adapter->queues[i]; - tmp = E1000_EICR_RX_QUEUE0 << i; - tmp |= E1000_EICR_TX_QUEUE0 << i; - que->eims = tmp; - E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), - i, que->eims); - adapter->que_mask |= que->eims; - } - - /* Link */ - E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), - E1000_EIMS_OTHER); - adapter->link_mask |= E1000_EIMS_OTHER; - default: - break; - } - - /* Set the starting interrupt rate */ - if (igb_max_interrupt_rate > 0) - newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC; - - if (hw->mac.type == e1000_82575) - newitr |= newitr << 16; - else - newitr |= E1000_EITR_CNT_IGNR; - - for (int i = 0; i < adapter->num_queues; i++) { - que = &adapter->queues[i]; - E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr); - } - - return; -} - - -static void -igb_free_pci_resources(struct adapter *adapter) -{ - struct igb_queue *que = adapter->queues; - device_t dev = adapter->dev; - int rid; - - /* - ** There is a slight possibility of a failure mode - ** in attach that will result in entering this function - ** before interrupt resources have been initialized, and - ** in that case we do not want to execute the loops below - ** We can detect this reliably by the state of the adapter - ** res pointer. - */ - if (adapter->res == NULL) - goto mem; - - /* - * First release all the interrupt resources: - */ - for (int i = 0; i < adapter->num_queues; i++, que++) { - rid = que->msix + 1; - if (que->tag != NULL) { - bus_teardown_intr(dev, que->res, que->tag); - que->tag = NULL; - } - if (que->res != NULL) - bus_release_resource(dev, - SYS_RES_IRQ, rid, que->res); - } - - /* Clean the Legacy or Link interrupt last */ - if (adapter->linkvec) /* we are doing MSIX */ - rid = adapter->linkvec + 1; - else - (adapter->msix != 0) ? (rid = 1):(rid = 0); - - que = adapter->queues; - if (adapter->tag != NULL) { - taskqueue_drain(que->tq, &adapter->link_task); - bus_teardown_intr(dev, adapter->res, adapter->tag); - adapter->tag = NULL; - } - if (adapter->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); - - for (int i = 0; i < adapter->num_queues; i++, que++) { - if (que->tq != NULL) { -#ifndef IGB_LEGACY_TX - taskqueue_drain(que->tq, &que->txr->txq_task); -#endif - taskqueue_drain(que->tq, &que->que_task); - taskqueue_free(que->tq); - } - } -mem: - if (adapter->msix) - pci_release_msi(dev); - - if (adapter->msix_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, - adapter->memrid, adapter->msix_mem); - - if (adapter->pci_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), adapter->pci_mem); - -} - -/* - * Setup Either MSI/X or MSI - */ -static int -igb_setup_msix(struct adapter *adapter) -{ - device_t dev = adapter->dev; - int bar, want, queues, msgs, maxqueues; - - /* tuneable override */ - if (igb_enable_msix == 0) - goto msi; - - /* First try MSI/X */ - msgs = pci_msix_count(dev); - if (msgs == 0) - goto msi; - /* - ** Some new devices, as with ixgbe, now may - ** use a different BAR, so we need to keep - ** track of which is used. - */ - adapter->memrid = PCIR_BAR(IGB_MSIX_BAR); - bar = pci_read_config(dev, adapter->memrid, 4); - if (bar == 0) /* use next bar */ - adapter->memrid += 4; - adapter->msix_mem = bus_alloc_resource_any(dev, - SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE); - if (adapter->msix_mem == NULL) { - /* May not be enabled */ - device_printf(adapter->dev, - "Unable to map MSIX table \n"); - goto msi; - } - - queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; - - /* Override via tuneable */ - if (igb_num_queues != 0) - queues = igb_num_queues; - -#ifdef RSS - /* If we're doing RSS, clamp at the number of RSS buckets */ - if (queues > rss_getnumbuckets()) - queues = rss_getnumbuckets(); -#endif - - - /* Sanity check based on HW */ - switch (adapter->hw.mac.type) { - case e1000_82575: - maxqueues = 4; - break; - case e1000_82576: - case e1000_82580: - case e1000_i350: - case e1000_i354: - maxqueues = 8; - break; - case e1000_i210: - maxqueues = 4; - break; - case e1000_i211: - maxqueues = 2; - break; - default: /* VF interfaces */ - maxqueues = 1; - break; - } - - /* Final clamp on the actual hardware capability */ - if (queues > maxqueues) - queues = maxqueues; - - /* - ** One vector (RX/TX pair) per queue - ** plus an additional for Link interrupt - */ - want = queues + 1; - if (msgs >= want) - msgs = want; - else { - device_printf(adapter->dev, - "MSIX Configuration Problem, " - "%d vectors configured, but %d queues wanted!\n", - msgs, want); - goto msi; - } - if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { - device_printf(adapter->dev, - "Using MSIX interrupts with %d vectors\n", msgs); - adapter->num_queues = queues; - return (msgs); - } - /* - ** If MSIX alloc failed or provided us with - ** less than needed, free and fall through to MSI - */ - pci_release_msi(dev); - -msi: - if (adapter->msix_mem != NULL) { - bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem); - adapter->msix_mem = NULL; - } - msgs = 1; - if (pci_alloc_msi(dev, &msgs) == 0) { - device_printf(adapter->dev," Using an MSI interrupt\n"); - return (msgs); - } - device_printf(adapter->dev," Using a Legacy interrupt\n"); - return (0); -} - -/********************************************************************* - * - * Initialize the DMA Coalescing feature - * - **********************************************************************/ -static void -igb_init_dmac(struct adapter *adapter, u32 pba) -{ - device_t dev = adapter->dev; - struct e1000_hw *hw = &adapter->hw; - u32 dmac, reg = ~E1000_DMACR_DMAC_EN; - u16 hwm; - - if (hw->mac.type == e1000_i211) - return; - - if (hw->mac.type > e1000_82580) { - - if (adapter->dmac == 0) { /* Disabling it */ - E1000_WRITE_REG(hw, E1000_DMACR, reg); - return; - } else - device_printf(dev, "DMA Coalescing enabled\n"); - - /* Set starting threshold */ - E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); - - hwm = 64 * pba - adapter->max_frame_size / 16; - if (hwm < 64 * (pba - 6)) - hwm = 64 * (pba - 6); - reg = E1000_READ_REG(hw, E1000_FCRTC); - reg &= ~E1000_FCRTC_RTH_COAL_MASK; - reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) - & E1000_FCRTC_RTH_COAL_MASK); - E1000_WRITE_REG(hw, E1000_FCRTC, reg); - - - dmac = pba - adapter->max_frame_size / 512; - if (dmac < pba - 10) - dmac = pba - 10; - reg = E1000_READ_REG(hw, E1000_DMACR); - reg &= ~E1000_DMACR_DMACTHR_MASK; - reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) - & E1000_DMACR_DMACTHR_MASK); - - /* transition to L0x or L1 if available..*/ - reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); - - /* Check if status is 2.5Gb backplane connection - * before configuration of watchdog timer, which is - * in msec values in 12.8usec intervals - * watchdog timer= msec values in 32usec intervals - * for non 2.5Gb connection - */ - if (hw->mac.type == e1000_i354) { - int status = E1000_READ_REG(hw, E1000_STATUS); - if ((status & E1000_STATUS_2P5_SKU) && - (!(status & E1000_STATUS_2P5_SKU_OVER))) - reg |= ((adapter->dmac * 5) >> 6); - else - reg |= (adapter->dmac >> 5); - } else { - reg |= (adapter->dmac >> 5); - } - - E1000_WRITE_REG(hw, E1000_DMACR, reg); - - E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); - - /* Set the interval before transition */ - reg = E1000_READ_REG(hw, E1000_DMCTLX); - if (hw->mac.type == e1000_i350) - reg |= IGB_DMCTLX_DCFLUSH_DIS; - /* - ** in 2.5Gb connection, TTLX unit is 0.4 usec - ** which is 0x4*2 = 0xA. But delay is still 4 usec - */ - if (hw->mac.type == e1000_i354) { - int status = E1000_READ_REG(hw, E1000_STATUS); - if ((status & E1000_STATUS_2P5_SKU) && - (!(status & E1000_STATUS_2P5_SKU_OVER))) - reg |= 0xA; - else - reg |= 0x4; - } else { - reg |= 0x4; - } - - E1000_WRITE_REG(hw, E1000_DMCTLX, reg); - - /* free space in tx packet buffer to wake from DMA coal */ - E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - - (2 * adapter->max_frame_size)) >> 6); - - /* make low power state decision controlled by DMA coal */ - reg = E1000_READ_REG(hw, E1000_PCIEMISC); - reg &= ~E1000_PCIEMISC_LX_DECISION; - E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); - - } else if (hw->mac.type == e1000_82580) { - u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); - E1000_WRITE_REG(hw, E1000_PCIEMISC, - reg & ~E1000_PCIEMISC_LX_DECISION); - E1000_WRITE_REG(hw, E1000_DMACR, 0); - } -} - - -/********************************************************************* - * - * Set up an fresh starting state - * - **********************************************************************/ -static void -igb_reset(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct e1000_hw *hw = &adapter->hw; - struct e1000_fc_info *fc = &hw->fc; - struct ifnet *ifp = adapter->ifp; - u32 pba = 0; - u16 hwm; - - INIT_DEBUGOUT("igb_reset: begin"); - - /* Let the firmware know the OS is in control */ - igb_get_hw_control(adapter); - - /* - * Packet Buffer Allocation (PBA) - * Writing PBA sets the receive portion of the buffer - * the remainder is used for the transmit buffer. - */ - switch (hw->mac.type) { - case e1000_82575: - pba = E1000_PBA_32K; - break; - case e1000_82576: - case e1000_vfadapt: - pba = E1000_READ_REG(hw, E1000_RXPBS); - pba &= E1000_RXPBS_SIZE_MASK_82576; - break; - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_vfadapt_i350: - pba = E1000_READ_REG(hw, E1000_RXPBS); - pba = e1000_rxpbs_adjust_82580(pba); - break; - case e1000_i210: - case e1000_i211: - pba = E1000_PBA_34K; - default: - break; - } - - /* Special needs in case of Jumbo frames */ - if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { - u32 tx_space, min_tx, min_rx; - pba = E1000_READ_REG(hw, E1000_PBA); - tx_space = pba >> 16; - pba &= 0xffff; - min_tx = (adapter->max_frame_size + - sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; - min_tx = roundup2(min_tx, 1024); - min_tx >>= 10; - min_rx = adapter->max_frame_size; - min_rx = roundup2(min_rx, 1024); - min_rx >>= 10; - if (tx_space < min_tx && - ((min_tx - tx_space) < pba)) { - pba = pba - (min_tx - tx_space); - /* - * if short on rx space, rx wins - * and must trump tx adjustment - */ - if (pba < min_rx) - pba = min_rx; - } - E1000_WRITE_REG(hw, E1000_PBA, pba); - } - - INIT_DEBUGOUT1("igb_init: pba=%dK",pba); - - /* - * These parameters control the automatic generation (Tx) and - * response (Rx) to Ethernet PAUSE frames. - * - High water mark should allow for at least two frames to be - * received after sending an XOFF. - * - Low water mark works best when it is very near the high water mark. - * This allows the receiver to restart by sending XON when it has - * drained a bit. - */ - hwm = min(((pba << 10) * 9 / 10), - ((pba << 10) - 2 * adapter->max_frame_size)); - - if (hw->mac.type < e1000_82576) { - fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ - fc->low_water = fc->high_water - 8; - } else { - fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ - fc->low_water = fc->high_water - 16; - } - - fc->pause_time = IGB_FC_PAUSE_TIME; - fc->send_xon = TRUE; - if (adapter->fc) - fc->requested_mode = adapter->fc; - else - fc->requested_mode = e1000_fc_default; - - /* Issue a global reset */ - e1000_reset_hw(hw); - E1000_WRITE_REG(hw, E1000_WUC, 0); - - /* Reset for AutoMediaDetect */ - if (adapter->flags & IGB_MEDIA_RESET) { - e1000_setup_init_funcs(hw, TRUE); - e1000_get_bus_info(hw); - adapter->flags &= ~IGB_MEDIA_RESET; - } - - if (e1000_init_hw(hw) < 0) - device_printf(dev, "Hardware Initialization Failed\n"); - - /* Setup DMA Coalescing */ - igb_init_dmac(adapter, pba); - - E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); - e1000_get_phy_info(hw); - e1000_check_for_link(hw); - return; -} - -/********************************************************************* - * - * Setup networking device structure and register an interface. - * - **********************************************************************/ -static int -igb_setup_interface(device_t dev, struct adapter *adapter) -{ - struct ifnet *ifp; - - INIT_DEBUGOUT("igb_setup_interface: begin"); - - ifp = adapter->ifp = if_alloc(IFT_ETHER); - if (ifp == NULL) { - device_printf(dev, "can not allocate ifnet structure\n"); - return (-1); - } - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - ifp->if_init = igb_init; - ifp->if_softc = adapter; - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_ioctl = igb_ioctl; - ifp->if_get_counter = igb_get_counter; - - /* TSO parameters */ - ifp->if_hw_tsomax = IP_MAXPACKET; - ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER; - ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE; - -#ifndef IGB_LEGACY_TX - ifp->if_transmit = igb_mq_start; - ifp->if_qflush = igb_qflush; -#else - ifp->if_start = igb_start; - IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); - ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; - IFQ_SET_READY(&ifp->if_snd); -#endif - - ether_ifattach(ifp, adapter->hw.mac.addr); - - ifp->if_capabilities = ifp->if_capenable = 0; - - ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; -#if __FreeBSD_version >= 1000000 - ifp->if_capabilities |= IFCAP_HWCSUM_IPV6; -#endif - ifp->if_capabilities |= IFCAP_TSO; - ifp->if_capabilities |= IFCAP_JUMBO_MTU; - ifp->if_capenable = ifp->if_capabilities; - - /* Don't enable LRO by default */ - ifp->if_capabilities |= IFCAP_LRO; - -#ifdef DEVICE_POLLING - ifp->if_capabilities |= IFCAP_POLLING; -#endif - - /* - * Tell the upper layer(s) we - * support full VLAN capability. - */ - ifp->if_hdrlen = sizeof(struct ether_vlan_header); - ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING - | IFCAP_VLAN_HWTSO - | IFCAP_VLAN_MTU; - ifp->if_capenable |= IFCAP_VLAN_HWTAGGING - | IFCAP_VLAN_HWTSO - | IFCAP_VLAN_MTU; - - /* - ** Don't turn this on by default, if vlans are - ** created on another pseudo device (eg. lagg) - ** then vlan events are not passed thru, breaking - ** operation, but with HW FILTER off it works. If - ** using vlans directly on the igb driver you can - ** enable this and get full hardware tag filtering. - */ - ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; - - /* - * Specify the media types supported by this adapter and register - * callbacks to update media and link information - */ - ifmedia_init(&adapter->media, IFM_IMASK, - igb_media_change, igb_media_status); - if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || - (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, - 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); - } else { - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, - 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, - 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, - 0, NULL); - if (adapter->hw.phy.type != e1000_phy_ife) { - ifmedia_add(&adapter->media, - IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); - ifmedia_add(&adapter->media, - IFM_ETHER | IFM_1000_T, 0, NULL); - } - } - ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); - ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); - return (0); -} - - -/* - * Manage DMA'able memory. - */ -static void -igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) -{ - if (error) - return; - *(bus_addr_t *) arg = segs[0].ds_addr; -} - -static int -igb_dma_malloc(struct adapter *adapter, bus_size_t size, - struct igb_dma_alloc *dma, int mapflags) -{ - int error; - - error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ - IGB_DBA_ALIGN, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - size, /* maxsize */ - 1, /* nsegments */ - size, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockarg */ - &dma->dma_tag); - if (error) { - device_printf(adapter->dev, - "%s: bus_dma_tag_create failed: %d\n", - __func__, error); - goto fail_0; - } - - error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, - BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); - if (error) { - device_printf(adapter->dev, - "%s: bus_dmamem_alloc(%ju) failed: %d\n", - __func__, (uintmax_t)size, error); - goto fail_2; - } - - dma->dma_paddr = 0; - error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, - size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); - if (error || dma->dma_paddr == 0) { - device_printf(adapter->dev, - "%s: bus_dmamap_load failed: %d\n", - __func__, error); - goto fail_3; - } - - return (0); - -fail_3: - bus_dmamap_unload(dma->dma_tag, dma->dma_map); -fail_2: - bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); - bus_dma_tag_destroy(dma->dma_tag); -fail_0: - dma->dma_tag = NULL; - - return (error); -} - -static void -igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma) -{ - if (dma->dma_tag == NULL) - return; - if (dma->dma_paddr != 0) { - bus_dmamap_sync(dma->dma_tag, dma->dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(dma->dma_tag, dma->dma_map); - dma->dma_paddr = 0; - } - if (dma->dma_vaddr != NULL) { - bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); - dma->dma_vaddr = NULL; - } - bus_dma_tag_destroy(dma->dma_tag); - dma->dma_tag = NULL; -} - - -/********************************************************************* - * - * Allocate memory for the transmit and receive rings, and then - * the descriptors associated with each, called only once at attach. - * - **********************************************************************/ -static int -igb_allocate_queues(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct igb_queue *que = NULL; - struct tx_ring *txr = NULL; - struct rx_ring *rxr = NULL; - int rsize, tsize, error = E1000_SUCCESS; - int txconf = 0, rxconf = 0; - - /* First allocate the top level queue structs */ - if (!(adapter->queues = - (struct igb_queue *) malloc(sizeof(struct igb_queue) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate queue memory\n"); - error = ENOMEM; - goto fail; - } - - /* Next allocate the TX ring struct memory */ - if (!(adapter->tx_rings = - (struct tx_ring *) malloc(sizeof(struct tx_ring) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate TX ring memory\n"); - error = ENOMEM; - goto tx_fail; - } - - /* Now allocate the RX */ - if (!(adapter->rx_rings = - (struct rx_ring *) malloc(sizeof(struct rx_ring) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate RX ring memory\n"); - error = ENOMEM; - goto rx_fail; - } - - tsize = roundup2(adapter->num_tx_desc * - sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN); - /* - * Now set up the TX queues, txconf is needed to handle the - * possibility that things fail midcourse and we need to - * undo memory gracefully - */ - for (int i = 0; i < adapter->num_queues; i++, txconf++) { - /* Set up some basics */ - txr = &adapter->tx_rings[i]; - txr->adapter = adapter; - txr->me = i; - txr->num_desc = adapter->num_tx_desc; - - /* Initialize the TX lock */ - snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", - device_get_nameunit(dev), txr->me); - mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); - - if (igb_dma_malloc(adapter, tsize, - &txr->txdma, BUS_DMA_NOWAIT)) { - device_printf(dev, - "Unable to allocate TX Descriptor memory\n"); - error = ENOMEM; - goto err_tx_desc; - } - txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr; - bzero((void *)txr->tx_base, tsize); - - /* Now allocate transmit buffers for the ring */ - if (igb_allocate_transmit_buffers(txr)) { - device_printf(dev, - "Critical Failure setting up transmit buffers\n"); - error = ENOMEM; - goto err_tx_desc; - } -#ifndef IGB_LEGACY_TX - /* Allocate a buf ring */ - txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF, - M_WAITOK, &txr->tx_mtx); -#endif - } - - /* - * Next the RX queues... - */ - rsize = roundup2(adapter->num_rx_desc * - sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); - for (int i = 0; i < adapter->num_queues; i++, rxconf++) { - rxr = &adapter->rx_rings[i]; - rxr->adapter = adapter; - rxr->me = i; - - /* Initialize the RX lock */ - snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", - device_get_nameunit(dev), txr->me); - mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); - - if (igb_dma_malloc(adapter, rsize, - &rxr->rxdma, BUS_DMA_NOWAIT)) { - device_printf(dev, - "Unable to allocate RxDescriptor memory\n"); - error = ENOMEM; - goto err_rx_desc; - } - rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr; - bzero((void *)rxr->rx_base, rsize); - - /* Allocate receive buffers for the ring*/ - if (igb_allocate_receive_buffers(rxr)) { - device_printf(dev, - "Critical Failure setting up receive buffers\n"); - error = ENOMEM; - goto err_rx_desc; - } - } - - /* - ** Finally set up the queue holding structs - */ - for (int i = 0; i < adapter->num_queues; i++) { - que = &adapter->queues[i]; - que->adapter = adapter; - que->txr = &adapter->tx_rings[i]; - que->rxr = &adapter->rx_rings[i]; - } - - return (0); - -err_rx_desc: - for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) - igb_dma_free(adapter, &rxr->rxdma); -err_tx_desc: - for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) - igb_dma_free(adapter, &txr->txdma); - free(adapter->rx_rings, M_DEVBUF); -rx_fail: -#ifndef IGB_LEGACY_TX - buf_ring_free(txr->br, M_DEVBUF); -#endif - free(adapter->tx_rings, M_DEVBUF); -tx_fail: - free(adapter->queues, M_DEVBUF); -fail: - return (error); -} - -/********************************************************************* - * - * Allocate memory for tx_buffer structures. The tx_buffer stores all - * the information needed to transmit a packet on the wire. This is - * called only once at attach, setup is done every reset. - * - **********************************************************************/ -static int -igb_allocate_transmit_buffers(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - device_t dev = adapter->dev; - struct igb_tx_buf *txbuf; - int error, i; - - /* - * Setup DMA descriptor areas. - */ - if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - IGB_TSO_SIZE, /* maxsize */ - IGB_MAX_SCATTER, /* nsegments */ - PAGE_SIZE, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockfuncarg */ - &txr->txtag))) { - device_printf(dev,"Unable to allocate TX DMA tag\n"); - goto fail; - } - - if (!(txr->tx_buffers = - (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) * - adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate tx_buffer memory\n"); - error = ENOMEM; - goto fail; - } - - /* Create the descriptor buffer dma maps */ - txbuf = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { - error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); - if (error != 0) { - device_printf(dev, "Unable to create TX DMA map\n"); - goto fail; - } - } - - return 0; -fail: - /* We free all, it handles case where we are in the middle */ - igb_free_transmit_structures(adapter); - return (error); -} - -/********************************************************************* - * - * Initialize a transmit ring. - * - **********************************************************************/ -static void -igb_setup_transmit_ring(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct igb_tx_buf *txbuf; - int i; -#ifdef DEV_NETMAP - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_slot *slot; -#endif /* DEV_NETMAP */ - - /* Clear the old descriptor contents */ - IGB_TX_LOCK(txr); -#ifdef DEV_NETMAP - slot = netmap_reset(na, NR_TX, txr->me, 0); -#endif /* DEV_NETMAP */ - bzero((void *)txr->tx_base, - (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc); - /* Reset indices */ - txr->next_avail_desc = 0; - txr->next_to_clean = 0; - - /* Free any existing tx buffers. */ - txbuf = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { - if (txbuf->m_head != NULL) { - bus_dmamap_sync(txr->txtag, txbuf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, txbuf->map); - m_freem(txbuf->m_head); - txbuf->m_head = NULL; - } -#ifdef DEV_NETMAP - if (slot) { - int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); - /* no need to set the address */ - netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si)); - } -#endif /* DEV_NETMAP */ - /* clear the watch index */ - txbuf->eop = NULL; - } - - /* Set number of descriptors available */ - txr->tx_avail = adapter->num_tx_desc; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - IGB_TX_UNLOCK(txr); -} - -/********************************************************************* - * - * Initialize all transmit rings. - * - **********************************************************************/ -static void -igb_setup_transmit_structures(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - - for (int i = 0; i < adapter->num_queues; i++, txr++) - igb_setup_transmit_ring(txr); - - return; -} - -/********************************************************************* - * - * Enable transmit unit. - * - **********************************************************************/ -static void -igb_initialize_transmit_units(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - struct e1000_hw *hw = &adapter->hw; - u32 tctl, txdctl; - - INIT_DEBUGOUT("igb_initialize_transmit_units: begin"); - tctl = txdctl = 0; - - /* Setup the Tx Descriptor Rings */ - for (int i = 0; i < adapter->num_queues; i++, txr++) { - u64 bus_addr = txr->txdma.dma_paddr; - - E1000_WRITE_REG(hw, E1000_TDLEN(i), - adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); - E1000_WRITE_REG(hw, E1000_TDBAH(i), - (uint32_t)(bus_addr >> 32)); - E1000_WRITE_REG(hw, E1000_TDBAL(i), - (uint32_t)bus_addr); - - /* Setup the HW Tx Head and Tail descriptor pointers */ - E1000_WRITE_REG(hw, E1000_TDT(i), 0); - E1000_WRITE_REG(hw, E1000_TDH(i), 0); - - HW_DEBUGOUT2("Base = %x, Length = %x\n", - E1000_READ_REG(hw, E1000_TDBAL(i)), - E1000_READ_REG(hw, E1000_TDLEN(i))); - - txr->queue_status = IGB_QUEUE_IDLE; - - txdctl |= IGB_TX_PTHRESH; - txdctl |= IGB_TX_HTHRESH << 8; - txdctl |= IGB_TX_WTHRESH << 16; - txdctl |= E1000_TXDCTL_QUEUE_ENABLE; - E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); - } - - if (adapter->vf_ifp) - return; - - e1000_config_collision_dist(hw); - - /* Program the Transmit Control Register */ - tctl = E1000_READ_REG(hw, E1000_TCTL); - tctl &= ~E1000_TCTL_CT; - tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | - (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); - - /* This write will effectively turn on the transmit unit. */ - E1000_WRITE_REG(hw, E1000_TCTL, tctl); -} - -/********************************************************************* - * - * Free all transmit rings. - * - **********************************************************************/ -static void -igb_free_transmit_structures(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IGB_TX_LOCK(txr); - igb_free_transmit_buffers(txr); - igb_dma_free(adapter, &txr->txdma); - IGB_TX_UNLOCK(txr); - IGB_TX_LOCK_DESTROY(txr); - } - free(adapter->tx_rings, M_DEVBUF); -} - -/********************************************************************* - * - * Free transmit ring related data structures. - * - **********************************************************************/ -static void -igb_free_transmit_buffers(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct igb_tx_buf *tx_buffer; - int i; - - INIT_DEBUGOUT("free_transmit_ring: begin"); - - if (txr->tx_buffers == NULL) - return; - - tx_buffer = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { - if (tx_buffer->m_head != NULL) { - bus_dmamap_sync(txr->txtag, tx_buffer->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, - tx_buffer->map); - m_freem(tx_buffer->m_head); - tx_buffer->m_head = NULL; - if (tx_buffer->map != NULL) { - bus_dmamap_destroy(txr->txtag, - tx_buffer->map); - tx_buffer->map = NULL; - } - } else if (tx_buffer->map != NULL) { - bus_dmamap_unload(txr->txtag, - tx_buffer->map); - bus_dmamap_destroy(txr->txtag, - tx_buffer->map); - tx_buffer->map = NULL; - } - } -#ifndef IGB_LEGACY_TX - if (txr->br != NULL) - buf_ring_free(txr->br, M_DEVBUF); -#endif - if (txr->tx_buffers != NULL) { - free(txr->tx_buffers, M_DEVBUF); - txr->tx_buffers = NULL; - } - if (txr->txtag != NULL) { - bus_dma_tag_destroy(txr->txtag); - txr->txtag = NULL; - } - return; -} - -/********************************************************************** - * - * Setup work for hardware segmentation offload (TSO) on - * adapters using advanced tx descriptors - * - **********************************************************************/ -static int -igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, - u32 *cmd_type_len, u32 *olinfo_status) -{ - struct adapter *adapter = txr->adapter; - struct e1000_adv_tx_context_desc *TXD; - u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; - u32 mss_l4len_idx = 0, paylen; - u16 vtag = 0, eh_type; - int ctxd, ehdrlen, ip_hlen, tcp_hlen; - struct ether_vlan_header *eh; -#ifdef INET6 - struct ip6_hdr *ip6; -#endif -#ifdef INET - struct ip *ip; -#endif - struct tcphdr *th; - - - /* - * Determine where frame payload starts. - * Jump over vlan headers if already present - */ - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - eh_type = eh->evl_proto; - } else { - ehdrlen = ETHER_HDR_LEN; - eh_type = eh->evl_encap_proto; - } - - switch (ntohs(eh_type)) { -#ifdef INET6 - case ETHERTYPE_IPV6: - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - /* XXX-BZ For now we do not pretend to support ext. hdrs. */ - if (ip6->ip6_nxt != IPPROTO_TCP) - return (ENXIO); - ip_hlen = sizeof(struct ip6_hdr); - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); - th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; - break; -#endif -#ifdef INET - case ETHERTYPE_IP: - ip = (struct ip *)(mp->m_data + ehdrlen); - if (ip->ip_p != IPPROTO_TCP) - return (ENXIO); - ip->ip_sum = 0; - ip_hlen = ip->ip_hl << 2; - th = (struct tcphdr *)((caddr_t)ip + ip_hlen); - th->th_sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htons(IPPROTO_TCP)); - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; - /* Tell transmit desc to also do IPv4 checksum. */ - *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; - break; -#endif - default: - panic("%s: CSUM_TSO but no supported IP version (0x%04x)", - __func__, ntohs(eh_type)); - break; - } - - ctxd = txr->next_avail_desc; - TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; - - tcp_hlen = th->th_off << 2; - - /* This is used in the transmit desc in encap */ - paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; - - /* VLAN MACLEN IPLEN */ - if (mp->m_flags & M_VLANTAG) { - vtag = htole16(mp->m_pkthdr.ether_vtag); - vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); - } - - vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; - vlan_macip_lens |= ip_hlen; - TXD->vlan_macip_lens = htole32(vlan_macip_lens); - - /* ADV DTYPE TUCMD */ - type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; - TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); - - /* MSS L4LEN IDX */ - mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); - mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); - /* 82575 needs the queue index added */ - if (adapter->hw.mac.type == e1000_82575) - mss_l4len_idx |= txr->me << 4; - TXD->mss_l4len_idx = htole32(mss_l4len_idx); - - TXD->seqnum_seed = htole32(0); - - if (++ctxd == txr->num_desc) - ctxd = 0; - - txr->tx_avail--; - txr->next_avail_desc = ctxd; - *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; - *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; - *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; - ++txr->tso_tx; - return (0); -} - -/********************************************************************* - * - * Advanced Context Descriptor setup for VLAN, CSUM or TSO - * - **********************************************************************/ - -static int -igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, - u32 *cmd_type_len, u32 *olinfo_status) -{ - struct e1000_adv_tx_context_desc *TXD; - struct adapter *adapter = txr->adapter; - struct ether_vlan_header *eh; - struct ip *ip; - struct ip6_hdr *ip6; - u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; - int ehdrlen, ip_hlen = 0; - u16 etype; - u8 ipproto = 0; - int offload = TRUE; - int ctxd = txr->next_avail_desc; - u16 vtag = 0; - - /* First check if TSO is to be used */ - if (mp->m_pkthdr.csum_flags & CSUM_TSO) - return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status)); - - if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) - offload = FALSE; - - /* Indicate the whole packet as payload when not doing TSO */ - *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT; - - /* Now ready a context descriptor */ - TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; - - /* - ** In advanced descriptors the vlan tag must - ** be placed into the context descriptor. Hence - ** we need to make one even if not doing offloads. - */ - if (mp->m_flags & M_VLANTAG) { - vtag = htole16(mp->m_pkthdr.ether_vtag); - vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); - } else if (offload == FALSE) /* ... no offload to do */ - return (0); - - /* - * Determine where frame payload starts. - * Jump over vlan headers if already present, - * helpful for QinQ too. - */ - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - etype = ntohs(eh->evl_proto); - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - } else { - etype = ntohs(eh->evl_encap_proto); - ehdrlen = ETHER_HDR_LEN; - } - - /* Set the ether header length */ - vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; - - switch (etype) { - case ETHERTYPE_IP: - ip = (struct ip *)(mp->m_data + ehdrlen); - ip_hlen = ip->ip_hl << 2; - ipproto = ip->ip_p; - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; - break; - case ETHERTYPE_IPV6: - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - ip_hlen = sizeof(struct ip6_hdr); - /* XXX-BZ this will go badly in case of ext hdrs. */ - ipproto = ip6->ip6_nxt; - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; - break; - default: - offload = FALSE; - break; - } - - vlan_macip_lens |= ip_hlen; - type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; - - switch (ipproto) { - case IPPROTO_TCP: -#if __FreeBSD_version >= 1000000 - if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) -#else - if (mp->m_pkthdr.csum_flags & CSUM_TCP) -#endif - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; - break; - case IPPROTO_UDP: -#if __FreeBSD_version >= 1000000 - if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) -#else - if (mp->m_pkthdr.csum_flags & CSUM_UDP) -#endif - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; - break; - -#if __FreeBSD_version >= 800000 - case IPPROTO_SCTP: -#if __FreeBSD_version >= 1000000 - if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) -#else - if (mp->m_pkthdr.csum_flags & CSUM_SCTP) -#endif - type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; - break; -#endif - default: - offload = FALSE; - break; - } - - if (offload) /* For the TX descriptor setup */ - *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; - - /* 82575 needs the queue index added */ - if (adapter->hw.mac.type == e1000_82575) - mss_l4len_idx = txr->me << 4; - - /* Now copy bits into descriptor */ - TXD->vlan_macip_lens = htole32(vlan_macip_lens); - TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); - TXD->seqnum_seed = htole32(0); - TXD->mss_l4len_idx = htole32(mss_l4len_idx); - - /* We've consumed the first desc, adjust counters */ - if (++ctxd == txr->num_desc) - ctxd = 0; - txr->next_avail_desc = ctxd; - --txr->tx_avail; - - return (0); -} - -/********************************************************************** - * - * Examine each tx_buffer in the used queue. If the hardware is done - * processing the packet then free associated resources. The - * tx_buffer is put back on the free queue. - * - * TRUE return means there's work in the ring to clean, FALSE its empty. - **********************************************************************/ -static bool -igb_txeof(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; -#ifdef DEV_NETMAP - struct ifnet *ifp = adapter->ifp; -#endif /* DEV_NETMAP */ - u32 work, processed = 0; - int limit = adapter->tx_process_limit; - struct igb_tx_buf *buf; - union e1000_adv_tx_desc *txd; - - mtx_assert(&txr->tx_mtx, MA_OWNED); - -#ifdef DEV_NETMAP - if (netmap_tx_irq(ifp, txr->me)) - return (FALSE); -#endif /* DEV_NETMAP */ - - if (txr->tx_avail == txr->num_desc) { - txr->queue_status = IGB_QUEUE_IDLE; - return FALSE; - } - - /* Get work starting point */ - work = txr->next_to_clean; - buf = &txr->tx_buffers[work]; - txd = &txr->tx_base[work]; - work -= txr->num_desc; /* The distance to ring end */ - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - do { - union e1000_adv_tx_desc *eop = buf->eop; - if (eop == NULL) /* No work */ - break; - - if ((eop->wb.status & E1000_TXD_STAT_DD) == 0) - break; /* I/O not complete */ - - if (buf->m_head) { - txr->bytes += - buf->m_head->m_pkthdr.len; - bus_dmamap_sync(txr->txtag, - buf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, - buf->map); - m_freem(buf->m_head); - buf->m_head = NULL; - } - buf->eop = NULL; - ++txr->tx_avail; - - /* We clean the range if multi segment */ - while (txd != eop) { - ++txd; - ++buf; - ++work; - /* wrap the ring? */ - if (__predict_false(!work)) { - work -= txr->num_desc; - buf = txr->tx_buffers; - txd = txr->tx_base; - } - if (buf->m_head) { - txr->bytes += - buf->m_head->m_pkthdr.len; - bus_dmamap_sync(txr->txtag, - buf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, - buf->map); - m_freem(buf->m_head); - buf->m_head = NULL; - } - ++txr->tx_avail; - buf->eop = NULL; - - } - ++txr->packets; - ++processed; - txr->watchdog_time = ticks; - - /* Try the next packet */ - ++txd; - ++buf; - ++work; - /* reset with a wrap */ - if (__predict_false(!work)) { - work -= txr->num_desc; - buf = txr->tx_buffers; - txd = txr->tx_base; - } - prefetch(txd); - } while (__predict_true(--limit)); - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - work += txr->num_desc; - txr->next_to_clean = work; - - /* - ** Watchdog calculation, we know there's - ** work outstanding or the first return - ** would have been taken, so none processed - ** for too long indicates a hang. - */ - if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG)) - txr->queue_status |= IGB_QUEUE_HUNG; - - if (txr->tx_avail >= IGB_QUEUE_THRESHOLD) - txr->queue_status &= ~IGB_QUEUE_DEPLETED; - - if (txr->tx_avail == txr->num_desc) { - txr->queue_status = IGB_QUEUE_IDLE; - return (FALSE); - } - - return (TRUE); -} - -/********************************************************************* - * - * Refresh mbuf buffers for RX descriptor rings - * - now keeps its own state so discards due to resource - * exhaustion are unnecessary, if an mbuf cannot be obtained - * it just returns, keeping its placeholder, thus it can simply - * be recalled to try again. - * - **********************************************************************/ -static void -igb_refresh_mbufs(struct rx_ring *rxr, int limit) -{ - struct adapter *adapter = rxr->adapter; - bus_dma_segment_t hseg[1]; - bus_dma_segment_t pseg[1]; - struct igb_rx_buf *rxbuf; - struct mbuf *mh, *mp; - int i, j, nsegs, error; - bool refreshed = FALSE; - - i = j = rxr->next_to_refresh; - /* - ** Get one descriptor beyond - ** our work mark to control - ** the loop. - */ - if (++j == adapter->num_rx_desc) - j = 0; - - while (j != limit) { - rxbuf = &rxr->rx_buffers[i]; - /* No hdr mbuf used with header split off */ - if (rxr->hdr_split == FALSE) - goto no_split; - if (rxbuf->m_head == NULL) { - mh = m_gethdr(M_NOWAIT, MT_DATA); - if (mh == NULL) - goto update; - } else - mh = rxbuf->m_head; - - mh->m_pkthdr.len = mh->m_len = MHLEN; - mh->m_len = MHLEN; - mh->m_flags |= M_PKTHDR; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->htag, - rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - printf("Refresh mbufs: hdr dmamap load" - " failure - %d\n", error); - m_free(mh); - rxbuf->m_head = NULL; - goto update; - } - rxbuf->m_head = mh; - bus_dmamap_sync(rxr->htag, rxbuf->hmap, - BUS_DMASYNC_PREREAD); - rxr->rx_base[i].read.hdr_addr = - htole64(hseg[0].ds_addr); -no_split: - if (rxbuf->m_pack == NULL) { - mp = m_getjcl(M_NOWAIT, MT_DATA, - M_PKTHDR, adapter->rx_mbuf_sz); - if (mp == NULL) - goto update; - } else - mp = rxbuf->m_pack; - - mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->ptag, - rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - printf("Refresh mbufs: payload dmamap load" - " failure - %d\n", error); - m_free(mp); - rxbuf->m_pack = NULL; - goto update; - } - rxbuf->m_pack = mp; - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_PREREAD); - rxr->rx_base[i].read.pkt_addr = - htole64(pseg[0].ds_addr); - refreshed = TRUE; /* I feel wefreshed :) */ - - i = j; /* our next is precalculated */ - rxr->next_to_refresh = i; - if (++j == adapter->num_rx_desc) - j = 0; - } -update: - if (refreshed) /* update tail */ - E1000_WRITE_REG(&adapter->hw, - E1000_RDT(rxr->me), rxr->next_to_refresh); - return; -} - - -/********************************************************************* - * - * Allocate memory for rx_buffer structures. Since we use one - * rx_buffer per received packet, the maximum number of rx_buffer's - * that we'll need is equal to the number of receive descriptors - * that we've allocated. - * - **********************************************************************/ -static int -igb_allocate_receive_buffers(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - device_t dev = adapter->dev; - struct igb_rx_buf *rxbuf; - int i, bsize, error; - - bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc; - if (!(rxr->rx_buffers = - (struct igb_rx_buf *) malloc(bsize, - M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate rx_buffer memory\n"); - error = ENOMEM; - goto fail; - } - - if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - MSIZE, /* maxsize */ - 1, /* nsegments */ - MSIZE, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockfuncarg */ - &rxr->htag))) { - device_printf(dev, "Unable to create RX DMA tag\n"); - goto fail; - } - - if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - MJUM9BYTES, /* maxsize */ - 1, /* nsegments */ - MJUM9BYTES, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockfuncarg */ - &rxr->ptag))) { - device_printf(dev, "Unable to create RX payload DMA tag\n"); - goto fail; - } - - for (i = 0; i < adapter->num_rx_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap); - if (error) { - device_printf(dev, - "Unable to create RX head DMA maps\n"); - goto fail; - } - error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); - if (error) { - device_printf(dev, - "Unable to create RX packet DMA maps\n"); - goto fail; - } - } - - return (0); - -fail: - /* Frees all, but can handle partial completion */ - igb_free_receive_structures(adapter); - return (error); -} - - -static void -igb_free_receive_ring(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - struct igb_rx_buf *rxbuf; - - - for (int i = 0; i < adapter->num_rx_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->m_head != NULL) { - bus_dmamap_sync(rxr->htag, rxbuf->hmap, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->htag, rxbuf->hmap); - rxbuf->m_head->m_flags |= M_PKTHDR; - m_freem(rxbuf->m_head); - } - if (rxbuf->m_pack != NULL) { - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->ptag, rxbuf->pmap); - rxbuf->m_pack->m_flags |= M_PKTHDR; - m_freem(rxbuf->m_pack); - } - rxbuf->m_head = NULL; - rxbuf->m_pack = NULL; - } -} - - -/********************************************************************* - * - * Initialize a receive ring and its buffers. - * - **********************************************************************/ -static int -igb_setup_receive_ring(struct rx_ring *rxr) -{ - struct adapter *adapter; - struct ifnet *ifp; - device_t dev; - struct igb_rx_buf *rxbuf; - bus_dma_segment_t pseg[1], hseg[1]; - struct lro_ctrl *lro = &rxr->lro; - int rsize, nsegs, error = 0; -#ifdef DEV_NETMAP - struct netmap_adapter *na = NA(rxr->adapter->ifp); - struct netmap_slot *slot; -#endif /* DEV_NETMAP */ - - adapter = rxr->adapter; - dev = adapter->dev; - ifp = adapter->ifp; - - /* Clear the ring contents */ - IGB_RX_LOCK(rxr); -#ifdef DEV_NETMAP - slot = netmap_reset(na, NR_RX, rxr->me, 0); -#endif /* DEV_NETMAP */ - rsize = roundup2(adapter->num_rx_desc * - sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); - bzero((void *)rxr->rx_base, rsize); - - /* - ** Free current RX buffer structures and their mbufs - */ - igb_free_receive_ring(rxr); - - /* Configure for header split? */ - if (igb_header_split) - rxr->hdr_split = TRUE; - - /* Now replenish the ring mbufs */ - for (int j = 0; j < adapter->num_rx_desc; ++j) { - struct mbuf *mh, *mp; - - rxbuf = &rxr->rx_buffers[j]; -#ifdef DEV_NETMAP - if (slot) { - /* slot sj is mapped to the j-th NIC-ring entry */ - int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); - uint64_t paddr; - void *addr; - - addr = PNMB(na, slot + sj, &paddr); - netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); - /* Update descriptor */ - rxr->rx_base[j].read.pkt_addr = htole64(paddr); - continue; - } -#endif /* DEV_NETMAP */ - if (rxr->hdr_split == FALSE) - goto skip_head; - - /* First the header */ - rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA); - if (rxbuf->m_head == NULL) { - error = ENOBUFS; - goto fail; - } - m_adj(rxbuf->m_head, ETHER_ALIGN); - mh = rxbuf->m_head; - mh->m_len = mh->m_pkthdr.len = MHLEN; - mh->m_flags |= M_PKTHDR; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->htag, - rxbuf->hmap, rxbuf->m_head, hseg, - &nsegs, BUS_DMA_NOWAIT); - if (error != 0) /* Nothing elegant to do here */ - goto fail; - bus_dmamap_sync(rxr->htag, - rxbuf->hmap, BUS_DMASYNC_PREREAD); - /* Update descriptor */ - rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr); - -skip_head: - /* Now the payload cluster */ - rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, - M_PKTHDR, adapter->rx_mbuf_sz); - if (rxbuf->m_pack == NULL) { - error = ENOBUFS; - goto fail; - } - mp = rxbuf->m_pack; - mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->ptag, - rxbuf->pmap, mp, pseg, - &nsegs, BUS_DMA_NOWAIT); - if (error != 0) - goto fail; - bus_dmamap_sync(rxr->ptag, - rxbuf->pmap, BUS_DMASYNC_PREREAD); - /* Update descriptor */ - rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr); - } - - /* Setup our descriptor indices */ - rxr->next_to_check = 0; - rxr->next_to_refresh = adapter->num_rx_desc - 1; - rxr->lro_enabled = FALSE; - rxr->rx_split_packets = 0; - rxr->rx_bytes = 0; - - rxr->fmp = NULL; - rxr->lmp = NULL; - - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* - ** Now set up the LRO interface, we - ** also only do head split when LRO - ** is enabled, since so often they - ** are undesirable in similar setups. - */ - if (ifp->if_capenable & IFCAP_LRO) { - error = tcp_lro_init(lro); - if (error) { - device_printf(dev, "LRO Initialization failed!\n"); - goto fail; - } - INIT_DEBUGOUT("RX LRO Initialized\n"); - rxr->lro_enabled = TRUE; - lro->ifp = adapter->ifp; - } - - IGB_RX_UNLOCK(rxr); - return (0); - -fail: - igb_free_receive_ring(rxr); - IGB_RX_UNLOCK(rxr); - return (error); -} - - -/********************************************************************* - * - * Initialize all receive rings. - * - **********************************************************************/ -static int -igb_setup_receive_structures(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - int i; - - for (i = 0; i < adapter->num_queues; i++, rxr++) - if (igb_setup_receive_ring(rxr)) - goto fail; - - return (0); -fail: - /* - * Free RX buffers allocated so far, we will only handle - * the rings that completed, the failing case will have - * cleaned up for itself. 'i' is the endpoint. - */ - for (int j = 0; j < i; ++j) { - rxr = &adapter->rx_rings[j]; - IGB_RX_LOCK(rxr); - igb_free_receive_ring(rxr); - IGB_RX_UNLOCK(rxr); - } - - return (ENOBUFS); -} - -/* - * Initialise the RSS mapping for NICs that support multiple transmit/ - * receive rings. - */ -static void -igb_initialise_rss_mapping(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - int i; - int queue_id; - u32 reta; - u32 rss_key[10], mrqc, shift = 0; - - /* XXX? */ - if (adapter->hw.mac.type == e1000_82575) - shift = 6; - - /* - * The redirection table controls which destination - * queue each bucket redirects traffic to. - * Each DWORD represents four queues, with the LSB - * being the first queue in the DWORD. - * - * This just allocates buckets to queues using round-robin - * allocation. - * - * NOTE: It Just Happens to line up with the default - * RSS allocation method. - */ - - /* Warning FM follows */ - reta = 0; - for (i = 0; i < 128; i++) { -#ifdef RSS - queue_id = rss_get_indirection_to_bucket(i); - /* - * If we have more queues than buckets, we'll - * end up mapping buckets to a subset of the - * queues. - * - * If we have more buckets than queues, we'll - * end up instead assigning multiple buckets - * to queues. - * - * Both are suboptimal, but we need to handle - * the case so we don't go out of bounds - * indexing arrays and such. - */ - queue_id = queue_id % adapter->num_queues; -#else - queue_id = (i % adapter->num_queues); -#endif - /* Adjust if required */ - queue_id = queue_id << shift; - - /* - * The low 8 bits are for hash value (n+0); - * The next 8 bits are for hash value (n+1), etc. - */ - reta = reta >> 8; - reta = reta | ( ((uint32_t) queue_id) << 24); - if ((i & 3) == 3) { - E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); - reta = 0; - } - } - - /* Now fill in hash table */ - - /* - * MRQC: Multiple Receive Queues Command - * Set queuing to RSS control, number depends on the device. - */ - mrqc = E1000_MRQC_ENABLE_RSS_8Q; - -#ifdef RSS - /* XXX ew typecasting */ - rss_getkey((uint8_t *) &rss_key); -#else - arc4rand(&rss_key, sizeof(rss_key), 0); -#endif - for (i = 0; i < 10; i++) - E1000_WRITE_REG_ARRAY(hw, - E1000_RSSRK(0), i, rss_key[i]); - - /* - * Configure the RSS fields to hash upon. - */ - mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | - E1000_MRQC_RSS_FIELD_IPV4_TCP); - mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | - E1000_MRQC_RSS_FIELD_IPV6_TCP); - mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | - E1000_MRQC_RSS_FIELD_IPV6_UDP); - mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | - E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); - - E1000_WRITE_REG(hw, E1000_MRQC, mrqc); -} - -/********************************************************************* - * - * Enable receive unit. - * - **********************************************************************/ -static void -igb_initialize_receive_units(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - struct ifnet *ifp = adapter->ifp; - struct e1000_hw *hw = &adapter->hw; - u32 rctl, rxcsum, psize, srrctl = 0; - - INIT_DEBUGOUT("igb_initialize_receive_unit: begin"); - - /* - * Make sure receives are disabled while setting - * up the descriptor ring - */ - rctl = E1000_READ_REG(hw, E1000_RCTL); - E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); - - /* - ** Set up for header split - */ - if (igb_header_split) { - /* Use a standard mbuf for the header */ - srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; - srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; - } else - srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; - - /* - ** Set up for jumbo frames - */ - if (ifp->if_mtu > ETHERMTU) { - rctl |= E1000_RCTL_LPE; - if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { - srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; - rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; - } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { - srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; - rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; - } - /* Set maximum packet len */ - psize = adapter->max_frame_size; - /* are we on a vlan? */ - if (adapter->ifp->if_vlantrunk != NULL) - psize += VLAN_TAG_SIZE; - E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); - } else { - rctl &= ~E1000_RCTL_LPE; - srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; - rctl |= E1000_RCTL_SZ_2048; - } - - /* - * If TX flow control is disabled and there's >1 queue defined, - * enable DROP. - * - * This drops frames rather than hanging the RX MAC for all queues. - */ - if ((adapter->num_queues > 1) && - (adapter->fc == e1000_fc_none || - adapter->fc == e1000_fc_rx_pause)) { - srrctl |= E1000_SRRCTL_DROP_EN; - } - - /* Setup the Base and Length of the Rx Descriptor Rings */ - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - u64 bus_addr = rxr->rxdma.dma_paddr; - u32 rxdctl; - - E1000_WRITE_REG(hw, E1000_RDLEN(i), - adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); - E1000_WRITE_REG(hw, E1000_RDBAH(i), - (uint32_t)(bus_addr >> 32)); - E1000_WRITE_REG(hw, E1000_RDBAL(i), - (uint32_t)bus_addr); - E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); - /* Enable this Queue */ - rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); - rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; - rxdctl &= 0xFFF00000; - rxdctl |= IGB_RX_PTHRESH; - rxdctl |= IGB_RX_HTHRESH << 8; - rxdctl |= IGB_RX_WTHRESH << 16; - E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); - } - - /* - ** Setup for RX MultiQueue - */ - rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); - if (adapter->num_queues >1) { - - /* rss setup */ - igb_initialise_rss_mapping(adapter); - - /* - ** NOTE: Receive Full-Packet Checksum Offload - ** is mutually exclusive with Multiqueue. However - ** this is not the same as TCP/IP checksums which - ** still work. - */ - rxcsum |= E1000_RXCSUM_PCSD; -#if __FreeBSD_version >= 800000 - /* For SCTP Offload */ - if ((hw->mac.type != e1000_82575) && - (ifp->if_capenable & IFCAP_RXCSUM)) - rxcsum |= E1000_RXCSUM_CRCOFL; -#endif - } else { - /* Non RSS setup */ - if (ifp->if_capenable & IFCAP_RXCSUM) { - rxcsum |= E1000_RXCSUM_IPPCSE; -#if __FreeBSD_version >= 800000 - if (adapter->hw.mac.type != e1000_82575) - rxcsum |= E1000_RXCSUM_CRCOFL; -#endif - } else - rxcsum &= ~E1000_RXCSUM_TUOFL; - } - E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); - - /* Setup the Receive Control Register */ - rctl &= ~(3 << E1000_RCTL_MO_SHIFT); - rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | - E1000_RCTL_RDMTS_HALF | - (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); - /* Strip CRC bytes. */ - rctl |= E1000_RCTL_SECRC; - /* Make sure VLAN Filters are off */ - rctl &= ~E1000_RCTL_VFE; - /* Don't store bad packets */ - rctl &= ~E1000_RCTL_SBP; - - /* Enable Receives */ - E1000_WRITE_REG(hw, E1000_RCTL, rctl); - - /* - * Setup the HW Rx Head and Tail Descriptor Pointers - * - needs to be after enable - */ - for (int i = 0; i < adapter->num_queues; i++) { - rxr = &adapter->rx_rings[i]; - E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); -#ifdef DEV_NETMAP - /* - * an init() while a netmap client is active must - * preserve the rx buffers passed to userspace. - * In this driver it means we adjust RDT to - * something different from next_to_refresh - * (which is not used in netmap mode). - */ - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_kring *kring = &na->rx_rings[i]; - int t = rxr->next_to_refresh - nm_kr_rxspace(kring); - - if (t >= adapter->num_rx_desc) - t -= adapter->num_rx_desc; - else if (t < 0) - t += adapter->num_rx_desc; - E1000_WRITE_REG(hw, E1000_RDT(i), t); - } else -#endif /* DEV_NETMAP */ - E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh); - } - return; -} - -/********************************************************************* - * - * Free receive rings. - * - **********************************************************************/ -static void -igb_free_receive_structures(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - struct lro_ctrl *lro = &rxr->lro; - igb_free_receive_buffers(rxr); - tcp_lro_free(lro); - igb_dma_free(adapter, &rxr->rxdma); - } - - free(adapter->rx_rings, M_DEVBUF); -} - -/********************************************************************* - * - * Free receive ring data structures. - * - **********************************************************************/ -static void -igb_free_receive_buffers(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - struct igb_rx_buf *rxbuf; - int i; - - INIT_DEBUGOUT("free_receive_structures: begin"); - - /* Cleanup any existing buffers */ - if (rxr->rx_buffers != NULL) { - for (i = 0; i < adapter->num_rx_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->m_head != NULL) { - bus_dmamap_sync(rxr->htag, rxbuf->hmap, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->htag, rxbuf->hmap); - rxbuf->m_head->m_flags |= M_PKTHDR; - m_freem(rxbuf->m_head); - } - if (rxbuf->m_pack != NULL) { - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->ptag, rxbuf->pmap); - rxbuf->m_pack->m_flags |= M_PKTHDR; - m_freem(rxbuf->m_pack); - } - rxbuf->m_head = NULL; - rxbuf->m_pack = NULL; - if (rxbuf->hmap != NULL) { - bus_dmamap_destroy(rxr->htag, rxbuf->hmap); - rxbuf->hmap = NULL; - } - if (rxbuf->pmap != NULL) { - bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); - rxbuf->pmap = NULL; - } - } - if (rxr->rx_buffers != NULL) { - free(rxr->rx_buffers, M_DEVBUF); - rxr->rx_buffers = NULL; - } - } - - if (rxr->htag != NULL) { - bus_dma_tag_destroy(rxr->htag); - rxr->htag = NULL; - } - if (rxr->ptag != NULL) { - bus_dma_tag_destroy(rxr->ptag); - rxr->ptag = NULL; - } -} - -static __inline void -igb_rx_discard(struct rx_ring *rxr, int i) -{ - struct igb_rx_buf *rbuf; - - rbuf = &rxr->rx_buffers[i]; - - /* Partially received? Free the chain */ - if (rxr->fmp != NULL) { - rxr->fmp->m_flags |= M_PKTHDR; - m_freem(rxr->fmp); - rxr->fmp = NULL; - rxr->lmp = NULL; - } - - /* - ** With advanced descriptors the writeback - ** clobbers the buffer addrs, so its easier - ** to just free the existing mbufs and take - ** the normal refresh path to get new buffers - ** and mapping. - */ - if (rbuf->m_head) { - m_free(rbuf->m_head); - rbuf->m_head = NULL; - bus_dmamap_unload(rxr->htag, rbuf->hmap); - } - - if (rbuf->m_pack) { - m_free(rbuf->m_pack); - rbuf->m_pack = NULL; - bus_dmamap_unload(rxr->ptag, rbuf->pmap); - } - - return; -} - -static __inline void -igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) -{ - - /* - * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet - * should be computed by hardware. Also it should not have VLAN tag in - * ethernet header. - */ - if (rxr->lro_enabled && - (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && - (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && - (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) == - (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) && - (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == - (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { - /* - * Send to the stack if: - ** - LRO not enabled, or - ** - no LRO resources, or - ** - lro enqueue fails - */ - if (rxr->lro.lro_cnt != 0) - if (tcp_lro_rx(&rxr->lro, m, 0) == 0) - return; - } - IGB_RX_UNLOCK(rxr); - (*ifp->if_input)(ifp, m); - IGB_RX_LOCK(rxr); -} - -/********************************************************************* - * - * This routine executes in interrupt context. It replenishes - * the mbufs in the descriptor and sends data which has been - * dma'ed into host memory to upper layer. - * - * We loop at most count times if count is > 0, or until done if - * count < 0. - * - * Return TRUE if more to clean, FALSE otherwise - *********************************************************************/ -static bool -igb_rxeof(struct igb_queue *que, int count, int *done) -{ - struct adapter *adapter = que->adapter; - struct rx_ring *rxr = que->rxr; - struct ifnet *ifp = adapter->ifp; - struct lro_ctrl *lro = &rxr->lro; - int i, processed = 0, rxdone = 0; - u32 ptype, staterr = 0; - union e1000_adv_rx_desc *cur; - - IGB_RX_LOCK(rxr); - /* Sync the ring. */ - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - -#ifdef DEV_NETMAP - if (netmap_rx_irq(ifp, rxr->me, &processed)) { - IGB_RX_UNLOCK(rxr); - return (FALSE); - } -#endif /* DEV_NETMAP */ - - /* Main clean loop */ - for (i = rxr->next_to_check; count != 0;) { - struct mbuf *sendmp, *mh, *mp; - struct igb_rx_buf *rxbuf; - u16 hlen, plen, hdr, vtag, pkt_info; - bool eop = FALSE; - - cur = &rxr->rx_base[i]; - staterr = le32toh(cur->wb.upper.status_error); - if ((staterr & E1000_RXD_STAT_DD) == 0) - break; - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; - count--; - sendmp = mh = mp = NULL; - cur->wb.upper.status_error = 0; - rxbuf = &rxr->rx_buffers[i]; - plen = le16toh(cur->wb.upper.length); - ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; - if (((adapter->hw.mac.type == e1000_i350) || - (adapter->hw.mac.type == e1000_i354)) && - (staterr & E1000_RXDEXT_STATERR_LB)) - vtag = be16toh(cur->wb.upper.vlan); - else - vtag = le16toh(cur->wb.upper.vlan); - hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info); - pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); - eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); - - /* - * Free the frame (all segments) if we're at EOP and - * it's an error. - * - * The datasheet states that EOP + status is only valid for - * the final segment in a multi-segment frame. - */ - if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) { - adapter->dropped_pkts++; - ++rxr->rx_discarded; - igb_rx_discard(rxr, i); - goto next_desc; - } - - /* - ** The way the hardware is configured to - ** split, it will ONLY use the header buffer - ** when header split is enabled, otherwise we - ** get normal behavior, ie, both header and - ** payload are DMA'd into the payload buffer. - ** - ** The fmp test is to catch the case where a - ** packet spans multiple descriptors, in that - ** case only the first header is valid. - */ - if (rxr->hdr_split && rxr->fmp == NULL) { - bus_dmamap_unload(rxr->htag, rxbuf->hmap); - hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >> - E1000_RXDADV_HDRBUFLEN_SHIFT; - if (hlen > IGB_HDR_BUF) - hlen = IGB_HDR_BUF; - mh = rxr->rx_buffers[i].m_head; - mh->m_len = hlen; - /* clear buf pointer for refresh */ - rxbuf->m_head = NULL; - /* - ** Get the payload length, this - ** could be zero if its a small - ** packet. - */ - if (plen > 0) { - mp = rxr->rx_buffers[i].m_pack; - mp->m_len = plen; - mh->m_next = mp; - /* clear buf pointer */ - rxbuf->m_pack = NULL; - rxr->rx_split_packets++; - } - } else { - /* - ** Either no header split, or a - ** secondary piece of a fragmented - ** split packet. - */ - mh = rxr->rx_buffers[i].m_pack; - mh->m_len = plen; - /* clear buf info for refresh */ - rxbuf->m_pack = NULL; - } - bus_dmamap_unload(rxr->ptag, rxbuf->pmap); - - ++processed; /* So we know when to refresh */ - - /* Initial frame - setup */ - if (rxr->fmp == NULL) { - mh->m_pkthdr.len = mh->m_len; - /* Save the head of the chain */ - rxr->fmp = mh; - rxr->lmp = mh; - if (mp != NULL) { - /* Add payload if split */ - mh->m_pkthdr.len += mp->m_len; - rxr->lmp = mh->m_next; - } - } else { - /* Chain mbuf's together */ - rxr->lmp->m_next = mh; - rxr->lmp = rxr->lmp->m_next; - rxr->fmp->m_pkthdr.len += mh->m_len; - } - - if (eop) { - rxr->fmp->m_pkthdr.rcvif = ifp; - rxr->rx_packets++; - /* capture data for AIM */ - rxr->packets++; - rxr->bytes += rxr->fmp->m_pkthdr.len; - rxr->rx_bytes += rxr->fmp->m_pkthdr.len; - - if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) - igb_rx_checksum(staterr, rxr->fmp, ptype); - - if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && - (staterr & E1000_RXD_STAT_VP) != 0) { - rxr->fmp->m_pkthdr.ether_vtag = vtag; - rxr->fmp->m_flags |= M_VLANTAG; - } - - /* - * In case of multiqueue, we have RXCSUM.PCSD bit set - * and never cleared. This means we have RSS hash - * available to be used. - */ - if (adapter->num_queues > 1) { - rxr->fmp->m_pkthdr.flowid = - le32toh(cur->wb.lower.hi_dword.rss); - switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { - case E1000_RXDADV_RSSTYPE_IPV4_TCP: - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_RSS_TCP_IPV4); - break; - case E1000_RXDADV_RSSTYPE_IPV4: - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_RSS_IPV4); - break; - case E1000_RXDADV_RSSTYPE_IPV6_TCP: - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_RSS_TCP_IPV6); - break; - case E1000_RXDADV_RSSTYPE_IPV6_EX: - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_RSS_IPV6_EX); - break; - case E1000_RXDADV_RSSTYPE_IPV6: - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_RSS_IPV6); - break; - case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_RSS_TCP_IPV6_EX); - break; - default: - /* XXX fallthrough */ - M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_OPAQUE_HASH); - } - } else { -#ifndef IGB_LEGACY_TX - rxr->fmp->m_pkthdr.flowid = que->msix; - M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE); -#endif - } - sendmp = rxr->fmp; - /* Make sure to set M_PKTHDR. */ - sendmp->m_flags |= M_PKTHDR; - rxr->fmp = NULL; - rxr->lmp = NULL; - } - -next_desc: - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* Advance our pointers to the next descriptor. */ - if (++i == adapter->num_rx_desc) - i = 0; - /* - ** Send to the stack or LRO - */ - if (sendmp != NULL) { - rxr->next_to_check = i; - igb_rx_input(rxr, ifp, sendmp, ptype); - i = rxr->next_to_check; - rxdone++; - } - - /* Every 8 descriptors we go to refresh mbufs */ - if (processed == 8) { - igb_refresh_mbufs(rxr, i); - processed = 0; - } - } - - /* Catch any remainders */ - if (igb_rx_unrefreshed(rxr)) - igb_refresh_mbufs(rxr, i); - - rxr->next_to_check = i; - - /* - * Flush any outstanding LRO work - */ - tcp_lro_flush_all(lro); - - if (done != NULL) - *done += rxdone; - - IGB_RX_UNLOCK(rxr); - return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE); -} - -/********************************************************************* - * - * Verify that the hardware indicated that the checksum is valid. - * Inform the stack about the status of checksum so that stack - * doesn't spend time verifying the checksum. - * - *********************************************************************/ -static void -igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype) -{ - u16 status = (u16)staterr; - u8 errors = (u8) (staterr >> 24); - int sctp; - - /* Ignore Checksum bit is set */ - if (status & E1000_RXD_STAT_IXSM) { - mp->m_pkthdr.csum_flags = 0; - return; - } - - if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && - (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) - sctp = 1; - else - sctp = 0; - if (status & E1000_RXD_STAT_IPCS) { - /* Did it pass? */ - if (!(errors & E1000_RXD_ERR_IPE)) { - /* IP Checksum Good */ - mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; - mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; - } else - mp->m_pkthdr.csum_flags = 0; - } - - if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { - u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); -#if __FreeBSD_version >= 800000 - if (sctp) /* reassign */ - type = CSUM_SCTP_VALID; -#endif - /* Did it pass? */ - if (!(errors & E1000_RXD_ERR_TCPE)) { - mp->m_pkthdr.csum_flags |= type; - if (sctp == 0) - mp->m_pkthdr.csum_data = htons(0xffff); - } - } - return; -} - -/* - * This routine is run via an vlan - * config EVENT - */ -static void -igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) -{ - struct adapter *adapter = ifp->if_softc; - u32 index, bit; - - if (ifp->if_softc != arg) /* Not our event */ - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IGB_CORE_LOCK(adapter); - index = (vtag >> 5) & 0x7F; - bit = vtag & 0x1F; - adapter->shadow_vfta[index] |= (1 << bit); - ++adapter->num_vlans; - /* Change hw filter setting */ - if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) - igb_setup_vlan_hw_support(adapter); - IGB_CORE_UNLOCK(adapter); -} - -/* - * This routine is run via an vlan - * unconfig EVENT - */ -static void -igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) -{ - struct adapter *adapter = ifp->if_softc; - u32 index, bit; - - if (ifp->if_softc != arg) - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IGB_CORE_LOCK(adapter); - index = (vtag >> 5) & 0x7F; - bit = vtag & 0x1F; - adapter->shadow_vfta[index] &= ~(1 << bit); - --adapter->num_vlans; - /* Change hw filter setting */ - if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) - igb_setup_vlan_hw_support(adapter); - IGB_CORE_UNLOCK(adapter); -} - -static void -igb_setup_vlan_hw_support(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - u32 reg; - - if (adapter->vf_ifp) { - e1000_rlpml_set_vf(hw, - adapter->max_frame_size + VLAN_TAG_SIZE); - return; - } - - reg = E1000_READ_REG(hw, E1000_CTRL); - reg |= E1000_CTRL_VME; - E1000_WRITE_REG(hw, E1000_CTRL, reg); - - /* Enable the Filter Table */ - if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { - reg = E1000_READ_REG(hw, E1000_RCTL); - reg &= ~E1000_RCTL_CFIEN; - reg |= E1000_RCTL_VFE; - E1000_WRITE_REG(hw, E1000_RCTL, reg); - } - - /* Update the frame size */ - E1000_WRITE_REG(&adapter->hw, E1000_RLPML, - adapter->max_frame_size + VLAN_TAG_SIZE); - - /* Don't bother with table if no vlans */ - if ((adapter->num_vlans == 0) || - ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) - return; - /* - ** A soft reset zero's out the VFTA, so - ** we need to repopulate it now. - */ - for (int i = 0; i < IGB_VFTA_SIZE; i++) - if (adapter->shadow_vfta[i] != 0) { - if (adapter->vf_ifp) - e1000_vfta_set_vf(hw, - adapter->shadow_vfta[i], TRUE); - else - e1000_write_vfta(hw, - i, adapter->shadow_vfta[i]); - } -} - -static void -igb_enable_intr(struct adapter *adapter) -{ - /* With RSS set up what to auto clear */ - if (adapter->msix_mem) { - u32 mask = (adapter->que_mask | adapter->link_mask); - E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); - E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); - E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); - E1000_WRITE_REG(&adapter->hw, E1000_IMS, - E1000_IMS_LSC); - } else { - E1000_WRITE_REG(&adapter->hw, E1000_IMS, - IMS_ENABLE_MASK); - } - E1000_WRITE_FLUSH(&adapter->hw); - - return; -} - -static void -igb_disable_intr(struct adapter *adapter) -{ - if (adapter->msix_mem) { - E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); - E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); - } - E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); - E1000_WRITE_FLUSH(&adapter->hw); - return; -} - -/* - * Bit of a misnomer, what this really means is - * to enable OS management of the system... aka - * to disable special hardware management features - */ -static void -igb_init_manageability(struct adapter *adapter) -{ - if (adapter->has_manage) { - int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); - int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); - - /* disable hardware interception of ARP */ - manc &= ~(E1000_MANC_ARP_EN); - - /* enable receiving management packets to the host */ - manc |= E1000_MANC_EN_MNG2HOST; - manc2h |= 1 << 5; /* Mng Port 623 */ - manc2h |= 1 << 6; /* Mng Port 664 */ - E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); - E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); - } -} - -/* - * Give control back to hardware management - * controller if there is one. - */ -static void -igb_release_manageability(struct adapter *adapter) -{ - if (adapter->has_manage) { - int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); - - /* re-enable hardware interception of ARP */ - manc |= E1000_MANC_ARP_EN; - manc &= ~E1000_MANC_EN_MNG2HOST; - - E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); - } -} - -/* - * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means that - * the driver is loaded. - * - */ -static void -igb_get_hw_control(struct adapter *adapter) -{ - u32 ctrl_ext; - - if (adapter->vf_ifp) - return; - - /* Let firmware know the driver has taken over */ - ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); - E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, - ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); -} - -/* - * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means that the - * driver is no longer loaded. - * - */ -static void -igb_release_hw_control(struct adapter *adapter) -{ - u32 ctrl_ext; - - if (adapter->vf_ifp) - return; - - /* Let firmware taken over control of h/w */ - ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); - E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, - ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); -} - -static int -igb_is_valid_ether_addr(uint8_t *addr) -{ - char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; - - if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { - return (FALSE); - } - - return (TRUE); -} - - -/* - * Enable PCI Wake On Lan capability - */ -static void -igb_enable_wakeup(device_t dev) -{ - u16 cap, status; - u8 id; - - /* First find the capabilities pointer*/ - cap = pci_read_config(dev, PCIR_CAP_PTR, 2); - /* Read the PM Capabilities */ - id = pci_read_config(dev, cap, 1); - if (id != PCIY_PMG) /* Something wrong */ - return; - /* OK, we have the power capabilities, so - now get the status register */ - cap += PCIR_POWER_STATUS; - status = pci_read_config(dev, cap, 2); - status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; - pci_write_config(dev, cap, status, 2); - return; -} - -static void -igb_led_func(void *arg, int onoff) -{ - struct adapter *adapter = arg; - - IGB_CORE_LOCK(adapter); - if (onoff) { - e1000_setup_led(&adapter->hw); - e1000_led_on(&adapter->hw); - } else { - e1000_led_off(&adapter->hw); - e1000_cleanup_led(&adapter->hw); - } - IGB_CORE_UNLOCK(adapter); -} - -static uint64_t -igb_get_vf_counter(if_t ifp, ift_counter cnt) -{ - struct adapter *adapter; - struct e1000_vf_stats *stats; -#ifndef IGB_LEGACY_TX - struct tx_ring *txr; - uint64_t rv; -#endif - - adapter = if_getsoftc(ifp); - stats = (struct e1000_vf_stats *)adapter->stats; - - switch (cnt) { - case IFCOUNTER_IPACKETS: - return (stats->gprc); - case IFCOUNTER_OPACKETS: - return (stats->gptc); - case IFCOUNTER_IBYTES: - return (stats->gorc); - case IFCOUNTER_OBYTES: - return (stats->gotc); - case IFCOUNTER_IMCASTS: - return (stats->mprc); - case IFCOUNTER_IERRORS: - return (adapter->dropped_pkts); - case IFCOUNTER_OERRORS: - return (adapter->watchdog_events); -#ifndef IGB_LEGACY_TX - case IFCOUNTER_OQDROPS: - rv = 0; - txr = adapter->tx_rings; - for (int i = 0; i < adapter->num_queues; i++, txr++) - rv += txr->br->br_drops; - return (rv); -#endif - default: - return (if_get_counter_default(ifp, cnt)); - } -} - -static uint64_t -igb_get_counter(if_t ifp, ift_counter cnt) -{ - struct adapter *adapter; - struct e1000_hw_stats *stats; -#ifndef IGB_LEGACY_TX - struct tx_ring *txr; - uint64_t rv; -#endif - - adapter = if_getsoftc(ifp); - if (adapter->vf_ifp) - return (igb_get_vf_counter(ifp, cnt)); - - stats = (struct e1000_hw_stats *)adapter->stats; - - switch (cnt) { - case IFCOUNTER_IPACKETS: - return (stats->gprc); - case IFCOUNTER_OPACKETS: - return (stats->gptc); - case IFCOUNTER_IBYTES: - return (stats->gorc); - case IFCOUNTER_OBYTES: - return (stats->gotc); - case IFCOUNTER_IMCASTS: - return (stats->mprc); - case IFCOUNTER_OMCASTS: - return (stats->mptc); - case IFCOUNTER_IERRORS: - return (adapter->dropped_pkts + stats->rxerrc + - stats->crcerrs + stats->algnerrc + - stats->ruc + stats->roc + stats->cexterr); - case IFCOUNTER_OERRORS: - return (stats->ecol + stats->latecol + - adapter->watchdog_events); - case IFCOUNTER_COLLISIONS: - return (stats->colc); - case IFCOUNTER_IQDROPS: - return (stats->mpc); -#ifndef IGB_LEGACY_TX - case IFCOUNTER_OQDROPS: - rv = 0; - txr = adapter->tx_rings; - for (int i = 0; i < adapter->num_queues; i++, txr++) - rv += txr->br->br_drops; - return (rv); -#endif - default: - return (if_get_counter_default(ifp, cnt)); - } -} - -/********************************************************************** - * - * Update the board statistics counters. - * - **********************************************************************/ -static void -igb_update_stats_counters(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct e1000_hw_stats *stats; - - /* - ** The virtual function adapter has only a - ** small controlled set of stats, do only - ** those and return. - */ - if (adapter->vf_ifp) { - igb_update_vf_stats_counters(adapter); - return; - } - - stats = (struct e1000_hw_stats *)adapter->stats; - - if (adapter->hw.phy.media_type == e1000_media_type_copper || - (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { - stats->symerrs += - E1000_READ_REG(hw,E1000_SYMERRS); - stats->sec += E1000_READ_REG(hw, E1000_SEC); - } - - stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); - stats->mpc += E1000_READ_REG(hw, E1000_MPC); - stats->scc += E1000_READ_REG(hw, E1000_SCC); - stats->ecol += E1000_READ_REG(hw, E1000_ECOL); - - stats->mcc += E1000_READ_REG(hw, E1000_MCC); - stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); - stats->colc += E1000_READ_REG(hw, E1000_COLC); - stats->dc += E1000_READ_REG(hw, E1000_DC); - stats->rlec += E1000_READ_REG(hw, E1000_RLEC); - stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); - stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); - /* - ** For watchdog management we need to know if we have been - ** paused during the last interval, so capture that here. - */ - adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); - stats->xoffrxc += adapter->pause_frames; - stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); - stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); - stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); - stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); - stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); - stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); - stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); - stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); - stats->gprc += E1000_READ_REG(hw, E1000_GPRC); - stats->bprc += E1000_READ_REG(hw, E1000_BPRC); - stats->mprc += E1000_READ_REG(hw, E1000_MPRC); - stats->gptc += E1000_READ_REG(hw, E1000_GPTC); - - /* For the 64-bit byte counters the low dword must be read first. */ - /* Both registers clear on the read of the high dword */ - - stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + - ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32); - stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + - ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32); - - stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); - stats->ruc += E1000_READ_REG(hw, E1000_RUC); - stats->rfc += E1000_READ_REG(hw, E1000_RFC); - stats->roc += E1000_READ_REG(hw, E1000_ROC); - stats->rjc += E1000_READ_REG(hw, E1000_RJC); - - stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC); - stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC); - stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC); - - stats->tor += E1000_READ_REG(hw, E1000_TORL) + - ((u64)E1000_READ_REG(hw, E1000_TORH) << 32); - stats->tot += E1000_READ_REG(hw, E1000_TOTL) + - ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32); - - stats->tpr += E1000_READ_REG(hw, E1000_TPR); - stats->tpt += E1000_READ_REG(hw, E1000_TPT); - stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); - stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); - stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); - stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); - stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); - stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); - stats->mptc += E1000_READ_REG(hw, E1000_MPTC); - stats->bptc += E1000_READ_REG(hw, E1000_BPTC); - - /* Interrupt Counts */ - - stats->iac += E1000_READ_REG(hw, E1000_IAC); - stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); - stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); - stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); - stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); - stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); - stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); - stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); - stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); - - /* Host to Card Statistics */ - - stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); - stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); - stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); - stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); - stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); - stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); - stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); - stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + - ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32)); - stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + - ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); - stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); - stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); - stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); - - stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); - stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); - stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); - stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); - stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); - stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); - - /* Driver specific counters */ - adapter->device_control = E1000_READ_REG(hw, E1000_CTRL); - adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL); - adapter->int_mask = E1000_READ_REG(hw, E1000_IMS); - adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS); - adapter->packet_buf_alloc_tx = - ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); - adapter->packet_buf_alloc_rx = - (E1000_READ_REG(hw, E1000_PBA) & 0xffff); -} - - -/********************************************************************** - * - * Initialize the VF board statistics counters. - * - **********************************************************************/ -static void -igb_vf_init_stats(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct e1000_vf_stats *stats; - - stats = (struct e1000_vf_stats *)adapter->stats; - if (stats == NULL) - return; - stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); - stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); - stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); - stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); - stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); -} - -/********************************************************************** - * - * Update the VF board statistics counters. - * - **********************************************************************/ -static void -igb_update_vf_stats_counters(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct e1000_vf_stats *stats; - - if (adapter->link_speed == 0) - return; - - stats = (struct e1000_vf_stats *)adapter->stats; - - UPDATE_VF_REG(E1000_VFGPRC, - stats->last_gprc, stats->gprc); - UPDATE_VF_REG(E1000_VFGORC, - stats->last_gorc, stats->gorc); - UPDATE_VF_REG(E1000_VFGPTC, - stats->last_gptc, stats->gptc); - UPDATE_VF_REG(E1000_VFGOTC, - stats->last_gotc, stats->gotc); - UPDATE_VF_REG(E1000_VFMPRC, - stats->last_mprc, stats->mprc); -} - -/* Export a single 32-bit register via a read-only sysctl. */ -static int -igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter; - u_int val; - - adapter = oidp->oid_arg1; - val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); - return (sysctl_handle_int(oidp, &val, 0, req)); -} - -/* -** Tuneable interrupt rate handler -*/ -static int -igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) -{ - struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1); - int error; - u32 reg, usec, rate; - - reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix)); - usec = ((reg & 0x7FFC) >> 2); - if (usec > 0) - rate = 1000000 / usec; - else - rate = 0; - error = sysctl_handle_int(oidp, &rate, 0, req); - if (error || !req->newptr) - return error; - return 0; -} - -/* - * Add sysctl variables, one per statistic, to the system. - */ -static void -igb_add_hw_stats(struct adapter *adapter) -{ - device_t dev = adapter->dev; - - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - - struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); - struct sysctl_oid *tree = device_get_sysctl_tree(dev); - struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); - struct e1000_hw_stats *stats = adapter->stats; - - struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node; - struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list; - -#define QUEUE_NAME_LEN 32 - char namebuf[QUEUE_NAME_LEN]; - - /* Driver Statistics */ - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", - CTLFLAG_RD, &adapter->dropped_pkts, - "Driver dropped packets"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", - CTLFLAG_RD, &adapter->link_irq, - "Link MSIX IRQ Handled"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", - CTLFLAG_RD, &adapter->mbuf_defrag_failed, - "Defragmenting mbuf chain failed"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", - CTLFLAG_RD, &adapter->no_tx_dma_setup, - "Driver tx dma failure in xmit"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", - CTLFLAG_RD, &adapter->rx_overruns, - "RX overruns"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", - CTLFLAG_RD, &adapter->watchdog_events, - "Watchdog timeouts"); - - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", - CTLFLAG_RD, &adapter->device_control, - "Device Control Register"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", - CTLFLAG_RD, &adapter->rx_control, - "Receiver Control Register"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", - CTLFLAG_RD, &adapter->int_mask, - "Interrupt Mask"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", - CTLFLAG_RD, &adapter->eint_mask, - "Extended Interrupt Mask"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", - CTLFLAG_RD, &adapter->packet_buf_alloc_tx, - "Transmit Buffer Packet Allocation"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", - CTLFLAG_RD, &adapter->packet_buf_alloc_rx, - "Receive Buffer Packet Allocation"); - SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", - CTLFLAG_RD, &adapter->hw.fc.high_water, 0, - "Flow Control High Watermark"); - SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", - CTLFLAG_RD, &adapter->hw.fc.low_water, 0, - "Flow Control Low Watermark"); - - for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { - struct lro_ctrl *lro = &rxr->lro; - - snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); - queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, - CTLFLAG_RD, NULL, "Queue Name"); - queue_list = SYSCTL_CHILDREN(queue_node); - - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", - CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i], - sizeof(&adapter->queues[i]), - igb_sysctl_interrupt_rate_handler, - "IU", "Interrupt Rate"); - - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), - igb_sysctl_reg_handler, "IU", - "Transmit Descriptor Head"); - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), - igb_sysctl_reg_handler, "IU", - "Transmit Descriptor Tail"); - SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", - CTLFLAG_RD, &txr->no_desc_avail, - "Queue Descriptors Unavailable"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", - CTLFLAG_RD, &txr->total_packets, - "Queue Packets Transmitted"); - - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), - igb_sysctl_reg_handler, "IU", - "Receive Descriptor Head"); - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), - igb_sysctl_reg_handler, "IU", - "Receive Descriptor Tail"); - SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", - CTLFLAG_RD, &rxr->rx_packets, - "Queue Packets Received"); - SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", - CTLFLAG_RD, &rxr->rx_bytes, - "Queue Bytes Received"); - SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", - CTLFLAG_RD, &lro->lro_queued, 0, - "LRO Queued"); - SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", - CTLFLAG_RD, &lro->lro_flushed, 0, - "LRO Flushed"); - } - - /* MAC stats get their own sub node */ - - stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", - CTLFLAG_RD, NULL, "MAC Statistics"); - stat_list = SYSCTL_CHILDREN(stat_node); - - /* - ** VF adapter has a very limited set of stats - ** since its not managing the metal, so to speak. - */ - if (adapter->vf_ifp) { - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", - CTLFLAG_RD, &stats->gprc, - "Good Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", - CTLFLAG_RD, &stats->gptc, - "Good Packets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", - CTLFLAG_RD, &stats->gorc, - "Good Octets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", - CTLFLAG_RD, &stats->gotc, - "Good Octets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", - CTLFLAG_RD, &stats->mprc, - "Multicast Packets Received"); - return; - } - - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", - CTLFLAG_RD, &stats->ecol, - "Excessive collisions"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", - CTLFLAG_RD, &stats->scc, - "Single collisions"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", - CTLFLAG_RD, &stats->mcc, - "Multiple collisions"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", - CTLFLAG_RD, &stats->latecol, - "Late collisions"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", - CTLFLAG_RD, &stats->colc, - "Collision Count"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors", - CTLFLAG_RD, &stats->symerrs, - "Symbol Errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors", - CTLFLAG_RD, &stats->sec, - "Sequence Errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count", - CTLFLAG_RD, &stats->dc, - "Defer Count"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets", - CTLFLAG_RD, &stats->mpc, - "Missed Packets"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors", - CTLFLAG_RD, &stats->rlec, - "Receive Length Errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", - CTLFLAG_RD, &stats->rnbc, - "Receive No Buffers"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize", - CTLFLAG_RD, &stats->ruc, - "Receive Undersize"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", - CTLFLAG_RD, &stats->rfc, - "Fragmented Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize", - CTLFLAG_RD, &stats->roc, - "Oversized Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber", - CTLFLAG_RD, &stats->rjc, - "Recevied Jabber"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs", - CTLFLAG_RD, &stats->rxerrc, - "Receive Errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs", - CTLFLAG_RD, &stats->crcerrs, - "CRC errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs", - CTLFLAG_RD, &stats->algnerrc, - "Alignment Errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs", - CTLFLAG_RD, &stats->tncrs, - "Transmit with No CRS"); - /* On 82575 these are collision counts */ - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", - CTLFLAG_RD, &stats->cexterr, - "Collision/Carrier extension errors"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd", - CTLFLAG_RD, &stats->xonrxc, - "XON Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd", - CTLFLAG_RD, &stats->xontxc, - "XON Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", - CTLFLAG_RD, &stats->xoffrxc, - "XOFF Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd", - CTLFLAG_RD, &stats->xofftxc, - "XOFF Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd", - CTLFLAG_RD, &stats->fcruc, - "Unsupported Flow Control Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd", - CTLFLAG_RD, &stats->mgprc, - "Management Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop", - CTLFLAG_RD, &stats->mgpdc, - "Management Packets Dropped"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd", - CTLFLAG_RD, &stats->mgptc, - "Management Packets Transmitted"); - /* Packet Reception Stats */ - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", - CTLFLAG_RD, &stats->tpr, - "Total Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", - CTLFLAG_RD, &stats->gprc, - "Good Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", - CTLFLAG_RD, &stats->bprc, - "Broadcast Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", - CTLFLAG_RD, &stats->mprc, - "Multicast Packets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", - CTLFLAG_RD, &stats->prc64, - "64 byte frames received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", - CTLFLAG_RD, &stats->prc127, - "65-127 byte frames received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", - CTLFLAG_RD, &stats->prc255, - "128-255 byte frames received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", - CTLFLAG_RD, &stats->prc511, - "256-511 byte frames received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", - CTLFLAG_RD, &stats->prc1023, - "512-1023 byte frames received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", - CTLFLAG_RD, &stats->prc1522, - "1023-1522 byte frames received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", - CTLFLAG_RD, &stats->gorc, - "Good Octets Received"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", - CTLFLAG_RD, &stats->tor, - "Total Octets Received"); - - /* Packet Transmission Stats */ - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", - CTLFLAG_RD, &stats->gotc, - "Good Octets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", - CTLFLAG_RD, &stats->tot, - "Total Octets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", - CTLFLAG_RD, &stats->tpt, - "Total Packets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", - CTLFLAG_RD, &stats->gptc, - "Good Packets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", - CTLFLAG_RD, &stats->bptc, - "Broadcast Packets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", - CTLFLAG_RD, &stats->mptc, - "Multicast Packets Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", - CTLFLAG_RD, &stats->ptc64, - "64 byte frames transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", - CTLFLAG_RD, &stats->ptc127, - "65-127 byte frames transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", - CTLFLAG_RD, &stats->ptc255, - "128-255 byte frames transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", - CTLFLAG_RD, &stats->ptc511, - "256-511 byte frames transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", - CTLFLAG_RD, &stats->ptc1023, - "512-1023 byte frames transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", - CTLFLAG_RD, &stats->ptc1522, - "1024-1522 byte frames transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd", - CTLFLAG_RD, &stats->tsctc, - "TSO Contexts Transmitted"); - SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", - CTLFLAG_RD, &stats->tsctfc, - "TSO Contexts Failed"); - - - /* Interrupt Stats */ - - int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", - CTLFLAG_RD, NULL, "Interrupt Statistics"); - int_list = SYSCTL_CHILDREN(int_node); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts", - CTLFLAG_RD, &stats->iac, - "Interrupt Assertion Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", - CTLFLAG_RD, &stats->icrxptc, - "Interrupt Cause Rx Pkt Timer Expire Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", - CTLFLAG_RD, &stats->icrxatc, - "Interrupt Cause Rx Abs Timer Expire Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", - CTLFLAG_RD, &stats->ictxptc, - "Interrupt Cause Tx Pkt Timer Expire Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", - CTLFLAG_RD, &stats->ictxatc, - "Interrupt Cause Tx Abs Timer Expire Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", - CTLFLAG_RD, &stats->ictxqec, - "Interrupt Cause Tx Queue Empty Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", - CTLFLAG_RD, &stats->ictxqmtc, - "Interrupt Cause Tx Queue Min Thresh Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", - CTLFLAG_RD, &stats->icrxdmtc, - "Interrupt Cause Rx Desc Min Thresh Count"); - - SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun", - CTLFLAG_RD, &stats->icrxoc, - "Interrupt Cause Receiver Overrun Count"); - - /* Host to Card Stats */ - - host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", - CTLFLAG_RD, NULL, - "Host to Card Statistics"); - - host_list = SYSCTL_CHILDREN(host_node); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt", - CTLFLAG_RD, &stats->cbtmpc, - "Circuit Breaker Tx Packet Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard", - CTLFLAG_RD, &stats->htdpmc, - "Host Transmit Discarded Packets"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt", - CTLFLAG_RD, &stats->rpthc, - "Rx Packets To Host"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts", - CTLFLAG_RD, &stats->cbrmpc, - "Circuit Breaker Rx Packet Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop", - CTLFLAG_RD, &stats->cbrdpc, - "Circuit Breaker Rx Dropped Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt", - CTLFLAG_RD, &stats->hgptc, - "Host Good Packets Tx Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop", - CTLFLAG_RD, &stats->htcbdpc, - "Host Tx Circuit Breaker Dropped Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes", - CTLFLAG_RD, &stats->hgorc, - "Host Good Octets Received Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes", - CTLFLAG_RD, &stats->hgotc, - "Host Good Octets Transmit Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors", - CTLFLAG_RD, &stats->lenerrs, - "Length Errors"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt", - CTLFLAG_RD, &stats->scvpc, - "SerDes/SGMII Code Violation Pkt Count"); - - SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed", - CTLFLAG_RD, &stats->hrmpc, - "Header Redirection Missed Packet Count"); -} - - -/********************************************************************** - * - * This routine provides a way to dump out the adapter eeprom, - * often a useful debug/service tool. This only dumps the first - * 32 words, stuff that matters is in that extent. - * - **********************************************************************/ -static int -igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter; - int error; - int result; - - result = -1; - error = sysctl_handle_int(oidp, &result, 0, req); - - if (error || !req->newptr) - return (error); - - /* - * This value will cause a hex dump of the - * first 32 16-bit words of the EEPROM to - * the screen. - */ - if (result == 1) { - adapter = (struct adapter *)arg1; - igb_print_nvm_info(adapter); - } - - return (error); -} - -static void -igb_print_nvm_info(struct adapter *adapter) -{ - u16 eeprom_data; - int i, j, row = 0; - - /* Its a bit crude, but it gets the job done */ - printf("\nInterface EEPROM Dump:\n"); - printf("Offset\n0x0000 "); - for (i = 0, j = 0; i < 32; i++, j++) { - if (j == 8) { /* Make the offset block */ - j = 0; ++row; - printf("\n0x00%x0 ",row); - } - e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); - printf("%04x ", eeprom_data); - } - printf("\n"); -} - -static void -igb_set_sysctl_value(struct adapter *adapter, const char *name, - const char *description, int *limit, int value) -{ - *limit = value; - SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLFLAG_RW, limit, value, description); -} - -/* -** Set flow control using sysctl: -** Flow control values: -** 0 - off -** 1 - rx pause -** 2 - tx pause -** 3 - full -*/ -static int -igb_set_flowcntl(SYSCTL_HANDLER_ARGS) -{ - int error; - static int input = 3; /* default is full */ - struct adapter *adapter = (struct adapter *) arg1; - - error = sysctl_handle_int(oidp, &input, 0, req); - - if ((error) || (req->newptr == NULL)) - return (error); - - switch (input) { - case e1000_fc_rx_pause: - case e1000_fc_tx_pause: - case e1000_fc_full: - case e1000_fc_none: - adapter->hw.fc.requested_mode = input; - adapter->fc = input; - break; - default: - /* Do nothing */ - return (error); - } - - adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; - e1000_force_mac_fc(&adapter->hw); - /* XXX TODO: update DROP_EN on each RX queue if appropriate */ - return (error); -} - -/* -** Manage DMA Coalesce: -** Control values: -** 0/1 - off/on -** Legal timer values are: -** 250,500,1000-10000 in thousands -*/ -static int -igb_sysctl_dmac(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - int error; - - error = sysctl_handle_int(oidp, &adapter->dmac, 0, req); - - if ((error) || (req->newptr == NULL)) - return (error); - - switch (adapter->dmac) { - case 0: - /* Disabling */ - break; - case 1: /* Just enable and use default */ - adapter->dmac = 1000; - break; - case 250: - case 500: - case 1000: - case 2000: - case 3000: - case 4000: - case 5000: - case 6000: - case 7000: - case 8000: - case 9000: - case 10000: - /* Legal values - allow */ - break; - default: - /* Do nothing, illegal value */ - adapter->dmac = 0; - return (EINVAL); - } - /* Reinit the interface */ - igb_init(adapter); - return (error); -} - -/* -** Manage Energy Efficient Ethernet: -** Control values: -** 0/1 - enabled/disabled -*/ -static int -igb_sysctl_eee(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - int error, value; - - value = adapter->hw.dev_spec._82575.eee_disable; - error = sysctl_handle_int(oidp, &value, 0, req); - if (error || req->newptr == NULL) - return (error); - IGB_CORE_LOCK(adapter); - adapter->hw.dev_spec._82575.eee_disable = (value != 0); - igb_init_locked(adapter); - IGB_CORE_UNLOCK(adapter); - return (0); -} diff --git a/freebsd/sys/dev/e1000/if_igb.h b/freebsd/sys/dev/e1000/if_igb.h deleted file mode 100644 index ea5ba649..00000000 --- a/freebsd/sys/dev/e1000/if_igb.h +++ /dev/null @@ -1,634 +0,0 @@ -/****************************************************************************** - - Copyright (c) 2001-2015, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - -******************************************************************************/ -/*$FreeBSD$*/ - -#ifndef _IF_IGB_H_ -#define _IF_IGB_H_ - -#ifdef ALTQ -#define IGB_LEGACY_TX -#endif - -#include -#include -#ifndef IGB_LEGACY_TX -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#ifdef RSS -#include -#include -#endif - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "e1000_api.h" -#include "e1000_82575.h" - -/* Tunables */ -/* - * IGB_TXD: Maximum number of Transmit Descriptors - * - * This value is the number of transmit descriptors allocated by the driver. - * Increasing this value allows the driver to queue more transmits. Each - * descriptor is 16 bytes. - * Since TDLEN should be multiple of 128bytes, the number of transmit - * desscriptors should meet the following condition. - * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 - */ -#define IGB_MIN_TXD 256 -#define IGB_DEFAULT_TXD 1024 -#define IGB_MAX_TXD 4096 - -/* - * IGB_RXD: Maximum number of Receive Descriptors - * - * This value is the number of receive descriptors allocated by the driver. - * Increasing this value allows the driver to buffer more incoming packets. - * Each descriptor is 16 bytes. A receive buffer is also allocated for each - * descriptor. The maximum MTU size is 16110. - * Since TDLEN should be multiple of 128bytes, the number of transmit - * desscriptors should meet the following condition. - * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 - */ -#define IGB_MIN_RXD 256 -#define IGB_DEFAULT_RXD 1024 -#define IGB_MAX_RXD 4096 - -/* - * IGB_TIDV - Transmit Interrupt Delay Value - * Valid Range: 0-65535 (0=off) - * Default Value: 64 - * This value delays the generation of transmit interrupts in units of - * 1.024 microseconds. Transmit interrupt reduction can improve CPU - * efficiency if properly tuned for specific network traffic. If the - * system is reporting dropped transmits, this value may be set too high - * causing the driver to run out of available transmit descriptors. - */ -#define IGB_TIDV 64 - -/* - * IGB_TADV - Transmit Absolute Interrupt Delay Value - * Valid Range: 0-65535 (0=off) - * Default Value: 64 - * This value, in units of 1.024 microseconds, limits the delay in which a - * transmit interrupt is generated. Useful only if IGB_TIDV is non-zero, - * this value ensures that an interrupt is generated after the initial - * packet is sent on the wire within the set amount of time. Proper tuning, - * along with IGB_TIDV, may improve traffic throughput in specific - * network conditions. - */ -#define IGB_TADV 64 - -/* - * IGB_RDTR - Receive Interrupt Delay Timer (Packet Timer) - * Valid Range: 0-65535 (0=off) - * Default Value: 0 - * This value delays the generation of receive interrupts in units of 1.024 - * microseconds. Receive interrupt reduction can improve CPU efficiency if - * properly tuned for specific network traffic. Increasing this value adds - * extra latency to frame reception and can end up decreasing the throughput - * of TCP traffic. If the system is reporting dropped receives, this value - * may be set too high, causing the driver to run out of available receive - * descriptors. - * - * CAUTION: When setting IGB_RDTR to a value other than 0, adapters - * may hang (stop transmitting) under certain network conditions. - * If this occurs a WATCHDOG message is logged in the system - * event log. In addition, the controller is automatically reset, - * restoring the network connection. To eliminate the potential - * for the hang ensure that IGB_RDTR is set to 0. - */ -#define IGB_RDTR 0 - -/* - * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) - * Valid Range: 0-65535 (0=off) - * Default Value: 64 - * This value, in units of 1.024 microseconds, limits the delay in which a - * receive interrupt is generated. Useful only if IGB_RDTR is non-zero, - * this value ensures that an interrupt is generated after the initial - * packet is received within the set amount of time. Proper tuning, - * along with IGB_RDTR, may improve traffic throughput in specific network - * conditions. - */ -#define IGB_RADV 64 - -/* - * This parameter controls the duration of transmit watchdog timer. - */ -#define IGB_WATCHDOG (10 * hz) - -/* - * This parameter controls when the driver calls the routine to reclaim - * transmit descriptors. Cleaning earlier seems a win. - */ -#define IGB_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 2) - -/* - * This parameter controls whether or not autonegotation is enabled. - * 0 - Disable autonegotiation - * 1 - Enable autonegotiation - */ -#define DO_AUTO_NEG 1 - -/* - * This parameter control whether or not the driver will wait for - * autonegotiation to complete. - * 1 - Wait for autonegotiation to complete - * 0 - Don't wait for autonegotiation to complete - */ -#define WAIT_FOR_AUTO_NEG_DEFAULT 0 - -/* Tunables -- End */ - -#define AUTONEG_ADV_DEFAULT (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ - ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ - ADVERTISE_1000_FULL) - -#define AUTO_ALL_MODES 0 - -/* PHY master/slave setting */ -#define IGB_MASTER_SLAVE e1000_ms_hw_default - -/* Support AutoMediaDetect for Marvell M88 PHY in i354 */ -#define IGB_MEDIA_RESET (1 << 0) - -/* - * Micellaneous constants - */ -#define IGB_INTEL_VENDOR_ID 0x8086 - -#define IGB_JUMBO_PBA 0x00000028 -#define IGB_DEFAULT_PBA 0x00000030 -#define IGB_SMARTSPEED_DOWNSHIFT 3 -#define IGB_SMARTSPEED_MAX 15 -#define IGB_MAX_LOOP 10 - -#define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : \ - ((hw->mac.type <= e1000_82576) ? 16 : 8)) -#define IGB_RX_HTHRESH 8 -#define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ - adapter->msix_mem) ? 1 : 4) - -#define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) -#define IGB_TX_HTHRESH 1 -#define IGB_TX_WTHRESH ((hw->mac.type != e1000_82575 && \ - adapter->msix_mem) ? 1 : 16) - -#define MAX_NUM_MULTICAST_ADDRESSES 128 -#define PCI_ANY_ID (~0U) -#define ETHER_ALIGN 2 -#define IGB_TX_BUFFER_SIZE ((uint32_t) 1514) -#define IGB_FC_PAUSE_TIME 0x0680 -#define IGB_EEPROM_APME 0x400; -/* Queue minimum free for use */ -#define IGB_QUEUE_THRESHOLD (adapter->num_tx_desc / 8) - -/* - * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be - * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will - * also optimize cache line size effect. H/W supports up to cache line size 128. - */ -#define IGB_DBA_ALIGN 128 - -#define SPEED_MODE_BIT (1<<21) /* On PCI-E MACs only */ - -/* PCI Config defines */ -#define IGB_MSIX_BAR 3 - -/* Defines for printing debug information */ -#define DEBUG_INIT 0 -#define DEBUG_IOCTL 0 -#define DEBUG_HW 0 - -#define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") -#define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) -#define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) -#define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") -#define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) -#define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) -#define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") -#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) -#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) - -#define IGB_MAX_SCATTER 40 -#define IGB_VFTA_SIZE 128 -#define IGB_BR_SIZE 4096 /* ring buf size */ -#define IGB_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) -#define IGB_TSO_SEG_SIZE 4096 /* Max dma segment size */ -#define IGB_TXPBSIZE 20408 -#define IGB_HDR_BUF 128 -#define IGB_PKTTYPE_MASK 0x0000FFF0 -#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ -#define ETH_ZLEN 60 -#define ETH_ADDR_LEN 6 - -/* Offload bits in mbuf flag */ -#if __FreeBSD_version >= 1000000 -#define CSUM_OFFLOAD_IPV4 (CSUM_IP|CSUM_IP_TCP|CSUM_IP_UDP|CSUM_IP_SCTP) -#define CSUM_OFFLOAD_IPV6 (CSUM_IP6_TCP|CSUM_IP6_UDP|CSUM_IP6_SCTP) -#define CSUM_OFFLOAD (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6) -#elif __FreeBSD_version >= 800000 -#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP) -#else -#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP) -#endif - -/* Define the starting Interrupt rate per Queue */ -#define IGB_INTS_PER_SEC 8000 -#define IGB_DEFAULT_ITR ((1000000/IGB_INTS_PER_SEC) << 2) - -#define IGB_LINK_ITR 2000 -#define I210_LINK_DELAY 1000 - -/* Precision Time Sync (IEEE 1588) defines */ -#define ETHERTYPE_IEEE1588 0x88F7 -#define PICOSECS_PER_TICK 20833 -#define TSYNC_PORT 319 /* UDP port for the protocol */ - -/* - * Bus dma allocation structure used by - * e1000_dma_malloc and e1000_dma_free. - */ -struct igb_dma_alloc { - bus_addr_t dma_paddr; - caddr_t dma_vaddr; - bus_dma_tag_t dma_tag; - bus_dmamap_t dma_map; - bus_dma_segment_t dma_seg; - int dma_nseg; -}; - - -/* -** Driver queue struct: this is the interrupt container -** for the associated tx and rx ring. -*/ -struct igb_queue { - struct adapter *adapter; - u32 msix; /* This queue's MSIX vector */ - u32 eims; /* This queue's EIMS bit */ - u32 eitr_setting; - struct resource *res; - void *tag; - struct tx_ring *txr; - struct rx_ring *rxr; - struct task que_task; - struct taskqueue *tq; - u64 irqs; -}; - -/* - * The transmit ring, one per queue - */ -struct tx_ring { - struct adapter *adapter; - struct mtx tx_mtx; - u32 me; - int watchdog_time; - union e1000_adv_tx_desc *tx_base; - struct igb_tx_buf *tx_buffers; - struct igb_dma_alloc txdma; - volatile u16 tx_avail; - u16 next_avail_desc; - u16 next_to_clean; - u16 num_desc; - enum { - IGB_QUEUE_IDLE = 1, - IGB_QUEUE_WORKING = 2, - IGB_QUEUE_HUNG = 4, - IGB_QUEUE_DEPLETED = 8, - } queue_status; - u32 txd_cmd; - bus_dma_tag_t txtag; - char mtx_name[16]; -#ifndef IGB_LEGACY_TX - struct buf_ring *br; - struct task txq_task; -#endif - u32 bytes; /* used for AIM */ - u32 packets; - /* Soft Stats */ - unsigned long tso_tx; - unsigned long no_tx_map_avail; - unsigned long no_tx_dma_setup; - u64 no_desc_avail; - u64 total_packets; -}; - -/* - * Receive ring: one per queue - */ -struct rx_ring { - struct adapter *adapter; - u32 me; - struct igb_dma_alloc rxdma; - union e1000_adv_rx_desc *rx_base; - struct lro_ctrl lro; - bool lro_enabled; - bool hdr_split; - struct mtx rx_mtx; - char mtx_name[16]; - u32 next_to_refresh; - u32 next_to_check; - struct igb_rx_buf *rx_buffers; - bus_dma_tag_t htag; /* dma tag for rx head */ - bus_dma_tag_t ptag; /* dma tag for rx packet */ - /* - * First/last mbuf pointers, for - * collecting multisegment RX packets. - */ - struct mbuf *fmp; - struct mbuf *lmp; - - u32 bytes; - u32 packets; - int rdt; - int rdh; - - /* Soft stats */ - u64 rx_split_packets; - u64 rx_discarded; - u64 rx_packets; - u64 rx_bytes; -}; - -struct adapter { - struct ifnet *ifp; - struct e1000_hw hw; - - struct e1000_osdep osdep; - device_t dev; - struct cdev *led_dev; - - struct resource *pci_mem; - struct resource *msix_mem; - int memrid; - - /* - * Interrupt resources: this set is - * either used for legacy, or for Link - * when doing MSIX - */ - void *tag; - struct resource *res; - - struct ifmedia media; - struct callout timer; - int msix; - int if_flags; - int pause_frames; - - struct mtx core_mtx; - - eventhandler_tag vlan_attach; - eventhandler_tag vlan_detach; - - u16 num_vlans; - u16 num_queues; - - /* - ** Shadow VFTA table, this is needed because - ** the real vlan filter table gets cleared during - ** a soft reset and the driver needs to be able - ** to repopulate it. - */ - u32 shadow_vfta[IGB_VFTA_SIZE]; - - /* Info about the interface */ - u32 optics; - u32 fc; /* local flow ctrl setting */ - int advertise; /* link speeds */ - bool link_active; - u16 max_frame_size; - u16 num_segs; - u16 link_speed; - bool link_up; - u32 linkvec; - u16 link_duplex; - u32 dmac; - int link_mask; - - /* Flags */ - u32 flags; - - /* Mbuf cluster size */ - u32 rx_mbuf_sz; - - /* Support for pluggable optics */ - bool sfp_probe; - struct task link_task; /* Link tasklet */ - struct task mod_task; /* SFP tasklet */ - struct task msf_task; /* Multispeed Fiber */ - struct taskqueue *tq; - - /* - ** Queues: - ** This is the irq holder, it has - ** and RX/TX pair or rings associated - ** with it. - */ - struct igb_queue *queues; - - /* - * Transmit rings: - * Allocated at run time, an array of rings. - */ - struct tx_ring *tx_rings; - u32 num_tx_desc; - - /* - * Receive rings: - * Allocated at run time, an array of rings. - */ - struct rx_ring *rx_rings; - u64 que_mask; - u32 num_rx_desc; - - /* Multicast array memory */ - u8 *mta; - - /* Misc stats maintained by the driver */ - unsigned long device_control; - unsigned long dropped_pkts; - unsigned long eint_mask; - unsigned long int_mask; - unsigned long link_irq; - unsigned long mbuf_defrag_failed; - unsigned long no_tx_dma_setup; - unsigned long packet_buf_alloc_rx; - unsigned long packet_buf_alloc_tx; - unsigned long rx_control; - unsigned long rx_overruns; - unsigned long watchdog_events; - - /* Used in pf and vf */ - void *stats; - - int enable_aim; - int has_manage; - int wol; - int rx_process_limit; - int tx_process_limit; - u16 vf_ifp; /* a VF interface */ - bool in_detach; /* Used only in igb_ioctl */ - -}; - -/* ****************************************************************************** - * vendor_info_array - * - * This array contains the list of Subvendor/Subdevice IDs on which the driver - * should load. - * - * ******************************************************************************/ -typedef struct _igb_vendor_info_t { - unsigned int vendor_id; - unsigned int device_id; - unsigned int subvendor_id; - unsigned int subdevice_id; - unsigned int index; -} igb_vendor_info_t; - -struct igb_tx_buf { - union e1000_adv_tx_desc *eop; - struct mbuf *m_head; - bus_dmamap_t map; -}; - -struct igb_rx_buf { - struct mbuf *m_head; - struct mbuf *m_pack; - bus_dmamap_t hmap; /* bus_dma map for header */ - bus_dmamap_t pmap; /* bus_dma map for packet */ -}; - -/* -** Find the number of unrefreshed RX descriptors -*/ -static inline u16 -igb_rx_unrefreshed(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - - if (rxr->next_to_check > rxr->next_to_refresh) - return (rxr->next_to_check - rxr->next_to_refresh - 1); - else - return ((adapter->num_rx_desc + rxr->next_to_check) - - rxr->next_to_refresh - 1); -} - -#define IGB_CORE_LOCK_INIT(_sc, _name) \ - mtx_init(&(_sc)->core_mtx, _name, "IGB Core Lock", MTX_DEF) -#define IGB_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) -#define IGB_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) -#define IGB_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) -#define IGB_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) - -#define IGB_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) -#define IGB_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) -#define IGB_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) -#define IGB_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) -#define IGB_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) - -#define IGB_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) -#define IGB_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) -#define IGB_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) -#define IGB_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_mtx, MA_OWNED) - -#define UPDATE_VF_REG(reg, last, cur) \ -{ \ - u32 new = E1000_READ_REG(hw, reg); \ - if (new < last) \ - cur += 0x100000000LL; \ - last = new; \ - cur &= 0xFFFFFFFF00000000LL; \ - cur |= new; \ -} - -#if __FreeBSD_version >= 800000 && __FreeBSD_version < 800504 -static __inline int -drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br) -{ -#ifdef ALTQ - if (ALTQ_IS_ENABLED(&ifp->if_snd)) - return (1); -#endif - return (!buf_ring_empty(br)); -} -#endif - -#endif /* _IF_IGB_H_ */ - - diff --git a/freebsd/sys/dev/e1000/if_lem.c b/freebsd/sys/dev/e1000/if_lem.c deleted file mode 100644 index b3da1bdd..00000000 --- a/freebsd/sys/dev/e1000/if_lem.c +++ /dev/null @@ -1,4732 +0,0 @@ -#include - -/****************************************************************************** - - Copyright (c) 2001-2015, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - -******************************************************************************/ -/*$FreeBSD$*/ - -/* - * Uncomment the following extensions for better performance in a VM, - * especially if you have support in the hypervisor. - * See http://info.iet.unipi.it/~luigi/netmap/ - */ -// #define BATCH_DISPATCH -// #define NIC_SEND_COMBINING - -#include -#include - -#ifdef HAVE_KERNEL_OPTION_HEADERS -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "e1000_api.h" -#include "if_lem.h" - -/********************************************************************* - * Legacy Em Driver version: - *********************************************************************/ -char lem_driver_version[] = "1.1.0"; - -/********************************************************************* - * PCI Device ID Table - * - * Used by probe to select devices to load on - * Last field stores an index into e1000_strings - * Last entry must be all 0s - * - * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } - *********************************************************************/ - -static em_vendor_info_t lem_vendor_info_array[] = -{ - /* Intel(R) PRO/1000 Network Connection */ - { 0x8086, E1000_DEV_ID_82540EM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82540EM_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82540EP, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82540EP_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82540EP_LP, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82541EI, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82541ER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82541ER_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82541EI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82541GI, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82541GI_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82541GI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82542, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82543GC_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82543GC_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82544EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82544EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82544GC_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82544GC_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82545EM_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82545EM_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82545GM_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82545GM_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82545GM_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82546EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546GB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546GB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546GB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546GB_PCIE, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3, - PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82547EI, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82547EI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82547GI, PCI_ANY_ID, PCI_ANY_ID, 0}, - /* required last entry */ - { 0, 0, 0, 0, 0} -}; - -/********************************************************************* - * Table of branding strings for all supported NICs. - *********************************************************************/ - -static char *lem_strings[] = { - "Intel(R) PRO/1000 Legacy Network Connection" -}; - -/********************************************************************* - * Function prototypes - *********************************************************************/ -static int lem_probe(device_t); -static int lem_attach(device_t); -static int lem_detach(device_t); -static int lem_shutdown(device_t); -static int lem_suspend(device_t); -static int lem_resume(device_t); -static void lem_start(if_t); -static void lem_start_locked(if_t ifp); -static int lem_ioctl(if_t, u_long, caddr_t); -static uint64_t lem_get_counter(if_t, ift_counter); -static void lem_init(void *); -static void lem_init_locked(struct adapter *); -static void lem_stop(void *); -static void lem_media_status(if_t, struct ifmediareq *); -static int lem_media_change(if_t); -static void lem_identify_hardware(struct adapter *); -static int lem_allocate_pci_resources(struct adapter *); -static int lem_allocate_irq(struct adapter *adapter); -static void lem_free_pci_resources(struct adapter *); -static void lem_local_timer(void *); -static int lem_hardware_init(struct adapter *); -static int lem_setup_interface(device_t, struct adapter *); -static void lem_setup_transmit_structures(struct adapter *); -static void lem_initialize_transmit_unit(struct adapter *); -static int lem_setup_receive_structures(struct adapter *); -static void lem_initialize_receive_unit(struct adapter *); -static void lem_enable_intr(struct adapter *); -static void lem_disable_intr(struct adapter *); -static void lem_free_transmit_structures(struct adapter *); -static void lem_free_receive_structures(struct adapter *); -static void lem_update_stats_counters(struct adapter *); -static void lem_add_hw_stats(struct adapter *adapter); -static void lem_txeof(struct adapter *); -static void lem_tx_purge(struct adapter *); -static int lem_allocate_receive_structures(struct adapter *); -static int lem_allocate_transmit_structures(struct adapter *); -static bool lem_rxeof(struct adapter *, int, int *); -#ifndef __NO_STRICT_ALIGNMENT -static int lem_fixup_rx(struct adapter *); -#endif -static void lem_receive_checksum(struct adapter *, struct e1000_rx_desc *, - struct mbuf *); -static void lem_transmit_checksum_setup(struct adapter *, struct mbuf *, - u32 *, u32 *); -static void lem_set_promisc(struct adapter *); -static void lem_disable_promisc(struct adapter *); -static void lem_set_multi(struct adapter *); -static void lem_update_link_status(struct adapter *); -static int lem_get_buf(struct adapter *, int); -static void lem_register_vlan(void *, if_t, u16); -static void lem_unregister_vlan(void *, if_t, u16); -static void lem_setup_vlan_hw_support(struct adapter *); -static int lem_xmit(struct adapter *, struct mbuf **); -static void lem_smartspeed(struct adapter *); -static int lem_82547_fifo_workaround(struct adapter *, int); -static void lem_82547_update_fifo_head(struct adapter *, int); -static int lem_82547_tx_fifo_reset(struct adapter *); -static void lem_82547_move_tail(void *); -static int lem_dma_malloc(struct adapter *, bus_size_t, - struct em_dma_alloc *, int); -static void lem_dma_free(struct adapter *, struct em_dma_alloc *); -static int lem_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); -static void lem_print_nvm_info(struct adapter *); -static int lem_is_valid_ether_addr(u8 *); -static u32 lem_fill_descriptors (bus_addr_t address, u32 length, - PDESC_ARRAY desc_array); -static int lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS); -static void lem_add_int_delay_sysctl(struct adapter *, const char *, - const char *, struct em_int_delay_info *, int, int); -static void lem_set_flow_cntrl(struct adapter *, const char *, - const char *, int *, int); -/* Management and WOL Support */ -static void lem_init_manageability(struct adapter *); -static void lem_release_manageability(struct adapter *); -static void lem_get_hw_control(struct adapter *); -static void lem_release_hw_control(struct adapter *); -static void lem_get_wakeup(device_t); -static void lem_enable_wakeup(device_t); -static int lem_enable_phy_wakeup(struct adapter *); -static void lem_led_func(void *, int); - -static void lem_intr(void *); -static int lem_irq_fast(void *); -static void lem_handle_rxtx(void *context, int pending); -static void lem_handle_link(void *context, int pending); -static void lem_add_rx_process_limit(struct adapter *, const char *, - const char *, int *, int); - -#ifdef DEVICE_POLLING -static poll_handler_t lem_poll; -#endif /* POLLING */ - -/********************************************************************* - * FreeBSD Device Interface Entry Points - *********************************************************************/ - -static device_method_t lem_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, lem_probe), - DEVMETHOD(device_attach, lem_attach), - DEVMETHOD(device_detach, lem_detach), - DEVMETHOD(device_shutdown, lem_shutdown), - DEVMETHOD(device_suspend, lem_suspend), - DEVMETHOD(device_resume, lem_resume), - DEVMETHOD_END -}; - -static driver_t lem_driver = { - "em", lem_methods, sizeof(struct adapter), -}; - -extern devclass_t em_devclass; -DRIVER_MODULE(lem, pci, lem_driver, em_devclass, 0, 0); -MODULE_DEPEND(lem, pci, 1, 1, 1); -MODULE_DEPEND(lem, ether, 1, 1, 1); -#ifdef DEV_NETMAP -MODULE_DEPEND(lem, netmap, 1, 1, 1); -#endif /* DEV_NETMAP */ - -/********************************************************************* - * Tunable default values. - *********************************************************************/ - -#define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) -#define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) - -#define MAX_INTS_PER_SEC 8000 -#define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) - -static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); -static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); -static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); -static int lem_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); -/* - * increase lem_rxd and lem_txd to at least 2048 in netmap mode - * for better performance. - */ -static int lem_rxd = EM_DEFAULT_RXD; -static int lem_txd = EM_DEFAULT_TXD; -static int lem_smart_pwr_down = FALSE; - -/* Controls whether promiscuous also shows bad packets */ -static int lem_debug_sbp = FALSE; - -TUNABLE_INT("hw.em.tx_int_delay", &lem_tx_int_delay_dflt); -TUNABLE_INT("hw.em.rx_int_delay", &lem_rx_int_delay_dflt); -TUNABLE_INT("hw.em.tx_abs_int_delay", &lem_tx_abs_int_delay_dflt); -TUNABLE_INT("hw.em.rx_abs_int_delay", &lem_rx_abs_int_delay_dflt); -TUNABLE_INT("hw.em.rxd", &lem_rxd); -TUNABLE_INT("hw.em.txd", &lem_txd); -TUNABLE_INT("hw.em.smart_pwr_down", &lem_smart_pwr_down); -TUNABLE_INT("hw.em.sbp", &lem_debug_sbp); - -/* Interrupt style - default to fast */ -static int lem_use_legacy_irq = 0; -TUNABLE_INT("hw.em.use_legacy_irq", &lem_use_legacy_irq); - -/* How many packets rxeof tries to clean at a time */ -static int lem_rx_process_limit = 100; -TUNABLE_INT("hw.em.rx_process_limit", &lem_rx_process_limit); - -/* Flow control setting - default to FULL */ -static int lem_fc_setting = e1000_fc_full; -TUNABLE_INT("hw.em.fc_setting", &lem_fc_setting); - -/* Global used in WOL setup with multiport cards */ -static int global_quad_port_a = 0; - -#ifdef DEV_NETMAP /* see ixgbe.c for details */ -#include -#endif /* DEV_NETMAP */ - -/********************************************************************* - * Device identification routine - * - * em_probe determines if the driver should be loaded on - * adapter based on PCI vendor/device id of the adapter. - * - * return BUS_PROBE_DEFAULT on success, positive on failure - *********************************************************************/ - -static int -lem_probe(device_t dev) -{ - char adapter_name[60]; - u16 pci_vendor_id = 0; - u16 pci_device_id = 0; - u16 pci_subvendor_id = 0; - u16 pci_subdevice_id = 0; - em_vendor_info_t *ent; - - INIT_DEBUGOUT("em_probe: begin"); - - pci_vendor_id = pci_get_vendor(dev); - if (pci_vendor_id != EM_VENDOR_ID) - return (ENXIO); - - pci_device_id = pci_get_device(dev); - pci_subvendor_id = pci_get_subvendor(dev); - pci_subdevice_id = pci_get_subdevice(dev); - - ent = lem_vendor_info_array; - while (ent->vendor_id != 0) { - if ((pci_vendor_id == ent->vendor_id) && - (pci_device_id == ent->device_id) && - - ((pci_subvendor_id == ent->subvendor_id) || - (ent->subvendor_id == PCI_ANY_ID)) && - - ((pci_subdevice_id == ent->subdevice_id) || - (ent->subdevice_id == PCI_ANY_ID))) { - sprintf(adapter_name, "%s %s", - lem_strings[ent->index], - lem_driver_version); - device_set_desc_copy(dev, adapter_name); - return (BUS_PROBE_DEFAULT); - } - ent++; - } - - return (ENXIO); -} - -/********************************************************************* - * Device initialization routine - * - * The attach entry point is called when the driver is being loaded. - * This routine identifies the type of hardware, allocates all resources - * and initializes the hardware. - * - * return 0 on success, positive on failure - *********************************************************************/ - -static int -lem_attach(device_t dev) -{ - struct adapter *adapter; - int tsize, rsize; - int error = 0; - - INIT_DEBUGOUT("lem_attach: begin"); - - adapter = device_get_softc(dev); - adapter->dev = adapter->osdep.dev = dev; - EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); - EM_TX_LOCK_INIT(adapter, device_get_nameunit(dev)); - EM_RX_LOCK_INIT(adapter, device_get_nameunit(dev)); - - /* SYSCTL stuff */ - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, - lem_sysctl_nvm_info, "I", "NVM Information"); - - callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); - callout_init_mtx(&adapter->tx_fifo_timer, &adapter->tx_mtx, 0); - - /* Determine hardware and mac info */ - lem_identify_hardware(adapter); - - /* Setup PCI resources */ - if (lem_allocate_pci_resources(adapter)) { - device_printf(dev, "Allocation of PCI resources failed\n"); - error = ENXIO; - goto err_pci; - } - - /* Do Shared Code initialization */ - if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { - device_printf(dev, "Setup of Shared code failed\n"); - error = ENXIO; - goto err_pci; - } - - e1000_get_bus_info(&adapter->hw); - - /* Set up some sysctls for the tunable interrupt delays */ - lem_add_int_delay_sysctl(adapter, "rx_int_delay", - "receive interrupt delay in usecs", &adapter->rx_int_delay, - E1000_REGISTER(&adapter->hw, E1000_RDTR), lem_rx_int_delay_dflt); - lem_add_int_delay_sysctl(adapter, "tx_int_delay", - "transmit interrupt delay in usecs", &adapter->tx_int_delay, - E1000_REGISTER(&adapter->hw, E1000_TIDV), lem_tx_int_delay_dflt); - if (adapter->hw.mac.type >= e1000_82540) { - lem_add_int_delay_sysctl(adapter, "rx_abs_int_delay", - "receive interrupt delay limit in usecs", - &adapter->rx_abs_int_delay, - E1000_REGISTER(&adapter->hw, E1000_RADV), - lem_rx_abs_int_delay_dflt); - lem_add_int_delay_sysctl(adapter, "tx_abs_int_delay", - "transmit interrupt delay limit in usecs", - &adapter->tx_abs_int_delay, - E1000_REGISTER(&adapter->hw, E1000_TADV), - lem_tx_abs_int_delay_dflt); - lem_add_int_delay_sysctl(adapter, "itr", - "interrupt delay limit in usecs/4", - &adapter->tx_itr, - E1000_REGISTER(&adapter->hw, E1000_ITR), - DEFAULT_ITR); - } - - /* Sysctls for limiting the amount of work done in the taskqueue */ - lem_add_rx_process_limit(adapter, "rx_processing_limit", - "max number of rx packets to process", &adapter->rx_process_limit, - lem_rx_process_limit); - -#ifdef NIC_SEND_COMBINING - /* Sysctls to control mitigation */ - lem_add_rx_process_limit(adapter, "sc_enable", - "driver TDT mitigation", &adapter->sc_enable, 0); -#endif /* NIC_SEND_COMBINING */ -#ifdef BATCH_DISPATCH - lem_add_rx_process_limit(adapter, "batch_enable", - "driver rx batch", &adapter->batch_enable, 0); -#endif /* BATCH_DISPATCH */ - - /* Sysctl for setting the interface flow control */ - lem_set_flow_cntrl(adapter, "flow_control", - "flow control setting", - &adapter->fc_setting, lem_fc_setting); - - /* - * Validate number of transmit and receive descriptors. It - * must not exceed hardware maximum, and must be multiple - * of E1000_DBA_ALIGN. - */ - if (((lem_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || - (adapter->hw.mac.type >= e1000_82544 && lem_txd > EM_MAX_TXD) || - (adapter->hw.mac.type < e1000_82544 && lem_txd > EM_MAX_TXD_82543) || - (lem_txd < EM_MIN_TXD)) { - device_printf(dev, "Using %d TX descriptors instead of %d!\n", - EM_DEFAULT_TXD, lem_txd); - adapter->num_tx_desc = EM_DEFAULT_TXD; - } else - adapter->num_tx_desc = lem_txd; - if (((lem_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 || - (adapter->hw.mac.type >= e1000_82544 && lem_rxd > EM_MAX_RXD) || - (adapter->hw.mac.type < e1000_82544 && lem_rxd > EM_MAX_RXD_82543) || - (lem_rxd < EM_MIN_RXD)) { - device_printf(dev, "Using %d RX descriptors instead of %d!\n", - EM_DEFAULT_RXD, lem_rxd); - adapter->num_rx_desc = EM_DEFAULT_RXD; - } else - adapter->num_rx_desc = lem_rxd; - - adapter->hw.mac.autoneg = DO_AUTO_NEG; - adapter->hw.phy.autoneg_wait_to_complete = FALSE; - adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; - adapter->rx_buffer_len = 2048; - - e1000_init_script_state_82541(&adapter->hw, TRUE); - e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE); - - /* Copper options */ - if (adapter->hw.phy.media_type == e1000_media_type_copper) { - adapter->hw.phy.mdix = AUTO_ALL_MODES; - adapter->hw.phy.disable_polarity_correction = FALSE; - adapter->hw.phy.ms_type = EM_MASTER_SLAVE; - } - - /* - * Set the frame limits assuming - * standard ethernet sized frames. - */ - adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; - adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE; - - /* - * This controls when hardware reports transmit completion - * status. - */ - adapter->hw.mac.report_tx_early = 1; - - /* - * It seems that the descriptor DMA engine on some PCI cards - * fetches memory past the end of the last descriptor in the - * ring. These reads are problematic when VT-d (DMAR) busdma - * is used. Allocate the scratch space to avoid getting - * faults from DMAR, by requesting scratch memory for one more - * descriptor. - */ - tsize = roundup2((adapter->num_tx_desc + 1) * - sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); - - /* Allocate Transmit Descriptor ring */ - if (lem_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) { - device_printf(dev, "Unable to allocate tx_desc memory\n"); - error = ENOMEM; - goto err_tx_desc; - } - adapter->tx_desc_base = - (struct e1000_tx_desc *)adapter->txdma.dma_vaddr; - - /* - * See comment above txdma allocation for rationale behind +1. - */ - rsize = roundup2((adapter->num_rx_desc + 1) * - sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); - - /* Allocate Receive Descriptor ring */ - if (lem_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) { - device_printf(dev, "Unable to allocate rx_desc memory\n"); - error = ENOMEM; - goto err_rx_desc; - } - adapter->rx_desc_base = - (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr; - - /* Allocate multicast array memory. */ - adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * - MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); - if (adapter->mta == NULL) { - device_printf(dev, "Can not allocate multicast setup array\n"); - error = ENOMEM; - goto err_hw_init; - } - - /* - ** Start from a known state, this is - ** important in reading the nvm and - ** mac from that. - */ - e1000_reset_hw(&adapter->hw); - - /* Make sure we have a good EEPROM before we read from it */ - if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { - /* - ** Some PCI-E parts fail the first check due to - ** the link being in sleep state, call it again, - ** if it fails a second time its a real issue. - */ - if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { - device_printf(dev, - "The EEPROM Checksum Is Not Valid\n"); - error = EIO; - goto err_hw_init; - } - } - - /* Copy the permanent MAC address out of the EEPROM */ - if (e1000_read_mac_addr(&adapter->hw) < 0) { - device_printf(dev, "EEPROM read error while reading MAC" - " address\n"); - error = EIO; - goto err_hw_init; - } - - if (!lem_is_valid_ether_addr(adapter->hw.mac.addr)) { - device_printf(dev, "Invalid MAC address\n"); - error = EIO; - goto err_hw_init; - } - - /* Initialize the hardware */ - if (lem_hardware_init(adapter)) { - device_printf(dev, "Unable to initialize the hardware\n"); - error = EIO; - goto err_hw_init; - } - - /* Allocate transmit descriptors and buffers */ - if (lem_allocate_transmit_structures(adapter)) { - device_printf(dev, "Could not setup transmit structures\n"); - error = ENOMEM; - goto err_tx_struct; - } - - /* Allocate receive descriptors and buffers */ - if (lem_allocate_receive_structures(adapter)) { - device_printf(dev, "Could not setup receive structures\n"); - error = ENOMEM; - goto err_rx_struct; - } - - /* - ** Do interrupt configuration - */ - error = lem_allocate_irq(adapter); - if (error) - goto err_rx_struct; - - /* - * Get Wake-on-Lan and Management info for later use - */ - lem_get_wakeup(dev); - - /* Setup OS specific network interface */ - if (lem_setup_interface(dev, adapter) != 0) - goto err_rx_struct; - - /* Initialize statistics */ - lem_update_stats_counters(adapter); - - adapter->hw.mac.get_link_status = 1; - lem_update_link_status(adapter); - - /* Indicate SOL/IDER usage */ - if (e1000_check_reset_block(&adapter->hw)) - device_printf(dev, - "PHY reset is blocked due to SOL/IDER session.\n"); - - /* Do we need workaround for 82544 PCI-X adapter? */ - if (adapter->hw.bus.type == e1000_bus_type_pcix && - adapter->hw.mac.type == e1000_82544) - adapter->pcix_82544 = TRUE; - else - adapter->pcix_82544 = FALSE; - - /* Register for VLAN events */ - adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, - lem_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); - adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, - lem_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); - - lem_add_hw_stats(adapter); - - /* Non-AMT based hardware can now take control from firmware */ - if (adapter->has_manage && !adapter->has_amt) - lem_get_hw_control(adapter); - - /* Tell the stack that the interface is not active */ - if_setdrvflagbits(adapter->ifp, 0, IFF_DRV_OACTIVE | IFF_DRV_RUNNING); - - adapter->led_dev = led_create(lem_led_func, adapter, - device_get_nameunit(dev)); - -#ifdef DEV_NETMAP - lem_netmap_attach(adapter); -#endif /* DEV_NETMAP */ - INIT_DEBUGOUT("lem_attach: end"); - - return (0); - -err_rx_struct: - lem_free_transmit_structures(adapter); -err_tx_struct: -err_hw_init: - lem_release_hw_control(adapter); - lem_dma_free(adapter, &adapter->rxdma); -err_rx_desc: - lem_dma_free(adapter, &adapter->txdma); -err_tx_desc: -err_pci: - if (adapter->ifp != (void *)NULL) - if_free(adapter->ifp); - lem_free_pci_resources(adapter); - free(adapter->mta, M_DEVBUF); - EM_TX_LOCK_DESTROY(adapter); - EM_RX_LOCK_DESTROY(adapter); - EM_CORE_LOCK_DESTROY(adapter); - - return (error); -} - -/********************************************************************* - * Device removal routine - * - * The detach entry point is called when the driver is being removed. - * This routine stops the adapter and deallocates all the resources - * that were allocated for driver operation. - * - * return 0 on success, positive on failure - *********************************************************************/ - -static int -lem_detach(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - if_t ifp = adapter->ifp; - - INIT_DEBUGOUT("em_detach: begin"); - - /* Make sure VLANS are not using driver */ - if (if_vlantrunkinuse(ifp)) { - device_printf(dev,"Vlan in use, detach first\n"); - return (EBUSY); - } - -#ifdef DEVICE_POLLING - if (if_getcapenable(ifp) & IFCAP_POLLING) - ether_poll_deregister(ifp); -#endif - - if (adapter->led_dev != NULL) - led_destroy(adapter->led_dev); - - EM_CORE_LOCK(adapter); - EM_TX_LOCK(adapter); - adapter->in_detach = 1; - lem_stop(adapter); - e1000_phy_hw_reset(&adapter->hw); - - lem_release_manageability(adapter); - - EM_TX_UNLOCK(adapter); - EM_CORE_UNLOCK(adapter); - - /* Unregister VLAN events */ - if (adapter->vlan_attach != NULL) - EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); - if (adapter->vlan_detach != NULL) - EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); - - ether_ifdetach(adapter->ifp); - callout_drain(&adapter->timer); - callout_drain(&adapter->tx_fifo_timer); - -#ifdef DEV_NETMAP - netmap_detach(ifp); -#endif /* DEV_NETMAP */ - lem_free_pci_resources(adapter); - bus_generic_detach(dev); - if_free(ifp); - - lem_free_transmit_structures(adapter); - lem_free_receive_structures(adapter); - - /* Free Transmit Descriptor ring */ - if (adapter->tx_desc_base) { - lem_dma_free(adapter, &adapter->txdma); - adapter->tx_desc_base = NULL; - } - - /* Free Receive Descriptor ring */ - if (adapter->rx_desc_base) { - lem_dma_free(adapter, &adapter->rxdma); - adapter->rx_desc_base = NULL; - } - - lem_release_hw_control(adapter); - free(adapter->mta, M_DEVBUF); - EM_TX_LOCK_DESTROY(adapter); - EM_RX_LOCK_DESTROY(adapter); - EM_CORE_LOCK_DESTROY(adapter); - - return (0); -} - -/********************************************************************* - * - * Shutdown entry point - * - **********************************************************************/ - -static int -lem_shutdown(device_t dev) -{ - return lem_suspend(dev); -} - -/* - * Suspend/resume device methods. - */ -static int -lem_suspend(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - - EM_CORE_LOCK(adapter); - - lem_release_manageability(adapter); - lem_release_hw_control(adapter); - lem_enable_wakeup(dev); - - EM_CORE_UNLOCK(adapter); - - return bus_generic_suspend(dev); -} - -static int -lem_resume(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - if_t ifp = adapter->ifp; - - EM_CORE_LOCK(adapter); - lem_init_locked(adapter); - lem_init_manageability(adapter); - EM_CORE_UNLOCK(adapter); - lem_start(ifp); - - return bus_generic_resume(dev); -} - - -static void -lem_start_locked(if_t ifp) -{ - struct adapter *adapter = if_getsoftc(ifp); - struct mbuf *m_head; - - EM_TX_LOCK_ASSERT(adapter); - - if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != - IFF_DRV_RUNNING) - return; - if (!adapter->link_active) - return; - - /* - * Force a cleanup if number of TX descriptors - * available hits the threshold - */ - if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) { - lem_txeof(adapter); - /* Now do we at least have a minimal? */ - if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) { - adapter->no_tx_desc_avail1++; - return; - } - } - - while (!if_sendq_empty(ifp)) { - m_head = if_dequeue(ifp); - - if (m_head == NULL) - break; - /* - * Encapsulation can modify our pointer, and or make it - * NULL on failure. In that event, we can't requeue. - */ - if (lem_xmit(adapter, &m_head)) { - if (m_head == NULL) - break; - if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); - if_sendq_prepend(ifp, m_head); - break; - } - - /* Send a copy of the frame to the BPF listener */ - if_etherbpfmtap(ifp, m_head); - - /* Set timeout in case hardware has problems transmitting. */ - adapter->watchdog_check = TRUE; - adapter->watchdog_time = ticks; - } - if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) - if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); - - return; -} - -static void -lem_start(if_t ifp) -{ - struct adapter *adapter = if_getsoftc(ifp); - - EM_TX_LOCK(adapter); - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) - lem_start_locked(ifp); - EM_TX_UNLOCK(adapter); -} - -/********************************************************************* - * Ioctl entry point - * - * em_ioctl is called when the user wants to configure the - * interface. - * - * return 0 on success, positive on failure - **********************************************************************/ - -static int -lem_ioctl(if_t ifp, u_long command, caddr_t data) -{ - struct adapter *adapter = if_getsoftc(ifp); - struct ifreq *ifr = (struct ifreq *)data; -#if defined(INET) || defined(INET6) - struct ifaddr *ifa = (struct ifaddr *)data; -#endif - bool avoid_reset = FALSE; - int error = 0; - - if (adapter->in_detach) - return (error); - - switch (command) { - case SIOCSIFADDR: -#ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) - avoid_reset = TRUE; -#endif -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) - avoid_reset = TRUE; -#endif - /* - ** Calling init results in link renegotiation, - ** so we avoid doing it when possible. - */ - if (avoid_reset) { - if_setflagbits(ifp, IFF_UP, 0); - if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) - lem_init(adapter); -#ifdef INET - if (!(if_getflags(ifp) & IFF_NOARP)) - arp_ifinit(ifp, ifa); -#endif - } else - error = ether_ioctl(ifp, command, data); - break; - case SIOCSIFMTU: - { - int max_frame_size; - - IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); - - EM_CORE_LOCK(adapter); - switch (adapter->hw.mac.type) { - case e1000_82542: - max_frame_size = ETHER_MAX_LEN; - break; - default: - max_frame_size = MAX_JUMBO_FRAME_SIZE; - } - if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - - ETHER_CRC_LEN) { - EM_CORE_UNLOCK(adapter); - error = EINVAL; - break; - } - - if_setmtu(ifp, ifr->ifr_mtu); - adapter->max_frame_size = - if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN; - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) - lem_init_locked(adapter); - EM_CORE_UNLOCK(adapter); - break; - } - case SIOCSIFFLAGS: - IOCTL_DEBUGOUT("ioctl rcv'd:\ - SIOCSIFFLAGS (Set Interface Flags)"); - EM_CORE_LOCK(adapter); - if (if_getflags(ifp) & IFF_UP) { - if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { - if ((if_getflags(ifp) ^ adapter->if_flags) & - (IFF_PROMISC | IFF_ALLMULTI)) { - lem_disable_promisc(adapter); - lem_set_promisc(adapter); - } - } else - lem_init_locked(adapter); - } else - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - EM_TX_LOCK(adapter); - lem_stop(adapter); - EM_TX_UNLOCK(adapter); - } - adapter->if_flags = if_getflags(ifp); - EM_CORE_UNLOCK(adapter); - break; - case SIOCADDMULTI: - case SIOCDELMULTI: - IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - EM_CORE_LOCK(adapter); - lem_disable_intr(adapter); - lem_set_multi(adapter); - if (adapter->hw.mac.type == e1000_82542 && - adapter->hw.revision_id == E1000_REVISION_2) { - lem_initialize_receive_unit(adapter); - } -#ifdef DEVICE_POLLING - if (!(if_getcapenable(ifp) & IFCAP_POLLING)) -#endif - lem_enable_intr(adapter); - EM_CORE_UNLOCK(adapter); - } - break; - case SIOCSIFMEDIA: - /* Check SOL/IDER usage */ - EM_CORE_LOCK(adapter); - if (e1000_check_reset_block(&adapter->hw)) { - EM_CORE_UNLOCK(adapter); - device_printf(adapter->dev, "Media change is" - " blocked due to SOL/IDER session.\n"); - break; - } - EM_CORE_UNLOCK(adapter); - case SIOCGIFMEDIA: - IOCTL_DEBUGOUT("ioctl rcv'd: \ - SIOCxIFMEDIA (Get/Set Interface Media)"); - error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); - break; - case SIOCSIFCAP: - { - int mask, reinit; - - IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); - reinit = 0; - mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); -#ifdef DEVICE_POLLING - if (mask & IFCAP_POLLING) { - if (ifr->ifr_reqcap & IFCAP_POLLING) { - error = ether_poll_register(lem_poll, ifp); - if (error) - return (error); - EM_CORE_LOCK(adapter); - lem_disable_intr(adapter); - if_setcapenablebit(ifp, IFCAP_POLLING, 0); - EM_CORE_UNLOCK(adapter); - } else { - error = ether_poll_deregister(ifp); - /* Enable interrupt even in error case */ - EM_CORE_LOCK(adapter); - lem_enable_intr(adapter); - if_setcapenablebit(ifp, 0, IFCAP_POLLING); - EM_CORE_UNLOCK(adapter); - } - } -#endif - if (mask & IFCAP_HWCSUM) { - if_togglecapenable(ifp, IFCAP_HWCSUM); - reinit = 1; - } - if (mask & IFCAP_VLAN_HWTAGGING) { - if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING); - reinit = 1; - } - if ((mask & IFCAP_WOL) && - (if_getcapabilities(ifp) & IFCAP_WOL) != 0) { - if (mask & IFCAP_WOL_MCAST) - if_togglecapenable(ifp, IFCAP_WOL_MCAST); - if (mask & IFCAP_WOL_MAGIC) - if_togglecapenable(ifp, IFCAP_WOL_MAGIC); - } - if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING)) - lem_init(adapter); - if_vlancap(ifp); - break; - } - - default: - error = ether_ioctl(ifp, command, data); - break; - } - - return (error); -} - - -/********************************************************************* - * Init entry point - * - * This routine is used in two ways. It is used by the stack as - * init entry point in network interface structure. It is also used - * by the driver as a hw/sw initialization routine to get to a - * consistent state. - * - * return 0 on success, positive on failure - **********************************************************************/ - -static void -lem_init_locked(struct adapter *adapter) -{ - if_t ifp = adapter->ifp; - device_t dev = adapter->dev; - u32 pba; - - INIT_DEBUGOUT("lem_init: begin"); - - EM_CORE_LOCK_ASSERT(adapter); - - EM_TX_LOCK(adapter); - lem_stop(adapter); - EM_TX_UNLOCK(adapter); - - /* - * Packet Buffer Allocation (PBA) - * Writing PBA sets the receive portion of the buffer - * the remainder is used for the transmit buffer. - * - * Devices before the 82547 had a Packet Buffer of 64K. - * Default allocation: PBA=48K for Rx, leaving 16K for Tx. - * After the 82547 the buffer was reduced to 40K. - * Default allocation: PBA=30K for Rx, leaving 10K for Tx. - * Note: default does not leave enough room for Jumbo Frame >10k. - */ - switch (adapter->hw.mac.type) { - case e1000_82547: - case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */ - if (adapter->max_frame_size > 8192) - pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */ - else - pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */ - adapter->tx_fifo_head = 0; - adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT; - adapter->tx_fifo_size = - (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT; - break; - default: - /* Devices before 82547 had a Packet Buffer of 64K. */ - if (adapter->max_frame_size > 8192) - pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ - else - pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ - } - - INIT_DEBUGOUT1("lem_init: pba=%dK",pba); - E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); - - /* Get the latest mac address, User can use a LAA */ - bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr, - ETHER_ADDR_LEN); - - /* Put the address into the Receive Address Array */ - e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); - - /* Initialize the hardware */ - if (lem_hardware_init(adapter)) { - device_printf(dev, "Unable to initialize the hardware\n"); - return; - } - lem_update_link_status(adapter); - - /* Setup VLAN support, basic and offload if available */ - E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); - - /* Set hardware offload abilities */ - if_clearhwassist(ifp); - if (adapter->hw.mac.type >= e1000_82543) { - if (if_getcapenable(ifp) & IFCAP_TXCSUM) - if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0); - } - - /* Configure for OS presence */ - lem_init_manageability(adapter); - - /* Prepare transmit descriptors and buffers */ - lem_setup_transmit_structures(adapter); - lem_initialize_transmit_unit(adapter); - - /* Setup Multicast table */ - lem_set_multi(adapter); - - /* Prepare receive descriptors and buffers */ - if (lem_setup_receive_structures(adapter)) { - device_printf(dev, "Could not setup receive structures\n"); - EM_TX_LOCK(adapter); - lem_stop(adapter); - EM_TX_UNLOCK(adapter); - return; - } - lem_initialize_receive_unit(adapter); - - /* Use real VLAN Filter support? */ - if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { - if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) - /* Use real VLAN Filter support */ - lem_setup_vlan_hw_support(adapter); - else { - u32 ctrl; - ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); - ctrl |= E1000_CTRL_VME; - E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); - } - } - - /* Don't lose promiscuous settings */ - lem_set_promisc(adapter); - - if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); - - callout_reset(&adapter->timer, hz, lem_local_timer, adapter); - e1000_clear_hw_cntrs_base_generic(&adapter->hw); - -#ifdef DEVICE_POLLING - /* - * Only enable interrupts if we are not polling, make sure - * they are off otherwise. - */ - if (if_getcapenable(ifp) & IFCAP_POLLING) - lem_disable_intr(adapter); - else -#endif /* DEVICE_POLLING */ - lem_enable_intr(adapter); - - /* AMT based hardware can now take control from firmware */ - if (adapter->has_manage && adapter->has_amt) - lem_get_hw_control(adapter); -} - -static void -lem_init(void *arg) -{ - struct adapter *adapter = arg; - - EM_CORE_LOCK(adapter); - lem_init_locked(adapter); - EM_CORE_UNLOCK(adapter); -} - - -#ifdef DEVICE_POLLING -/********************************************************************* - * - * Legacy polling routine - * - *********************************************************************/ -static int -lem_poll(if_t ifp, enum poll_cmd cmd, int count) -{ - struct adapter *adapter = if_getsoftc(ifp); - u32 reg_icr, rx_done = 0; - - EM_CORE_LOCK(adapter); - if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { - EM_CORE_UNLOCK(adapter); - return (rx_done); - } - - if (cmd == POLL_AND_CHECK_STATUS) { - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - callout_stop(&adapter->timer); - adapter->hw.mac.get_link_status = 1; - lem_update_link_status(adapter); - callout_reset(&adapter->timer, hz, - lem_local_timer, adapter); - } - } - EM_CORE_UNLOCK(adapter); - - lem_rxeof(adapter, count, &rx_done); - - EM_TX_LOCK(adapter); - lem_txeof(adapter); - if(!if_sendq_empty(ifp)) - lem_start_locked(ifp); - EM_TX_UNLOCK(adapter); - return (rx_done); -} -#endif /* DEVICE_POLLING */ - -/********************************************************************* - * - * Legacy Interrupt Service routine - * - *********************************************************************/ -static void -lem_intr(void *arg) -{ - struct adapter *adapter = arg; - if_t ifp = adapter->ifp; - u32 reg_icr; - - - if ((if_getcapenable(ifp) & IFCAP_POLLING) || - ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) - return; - - EM_CORE_LOCK(adapter); - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - if (reg_icr & E1000_ICR_RXO) - adapter->rx_overruns++; - - if ((reg_icr == 0xffffffff) || (reg_icr == 0)) { - EM_CORE_UNLOCK(adapter); - return; - } - - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - callout_stop(&adapter->timer); - adapter->hw.mac.get_link_status = 1; - lem_update_link_status(adapter); - /* Deal with TX cruft when link lost */ - lem_tx_purge(adapter); - callout_reset(&adapter->timer, hz, - lem_local_timer, adapter); - EM_CORE_UNLOCK(adapter); - return; - } - - EM_CORE_UNLOCK(adapter); - lem_rxeof(adapter, -1, NULL); - - EM_TX_LOCK(adapter); - lem_txeof(adapter); - if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) && - (!if_sendq_empty(ifp))) - lem_start_locked(ifp); - EM_TX_UNLOCK(adapter); - return; -} - - -static void -lem_handle_link(void *context, int pending) -{ - struct adapter *adapter = context; - if_t ifp = adapter->ifp; - - if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) - return; - - EM_CORE_LOCK(adapter); - callout_stop(&adapter->timer); - lem_update_link_status(adapter); - /* Deal with TX cruft when link lost */ - lem_tx_purge(adapter); - callout_reset(&adapter->timer, hz, lem_local_timer, adapter); - EM_CORE_UNLOCK(adapter); -} - - -/* Combined RX/TX handler, used by Legacy and MSI */ -static void -lem_handle_rxtx(void *context, int pending) -{ - struct adapter *adapter = context; - if_t ifp = adapter->ifp; - - - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - bool more = lem_rxeof(adapter, adapter->rx_process_limit, NULL); - EM_TX_LOCK(adapter); - lem_txeof(adapter); - if(!if_sendq_empty(ifp)) - lem_start_locked(ifp); - EM_TX_UNLOCK(adapter); - if (more) { - taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); - return; - } - } - - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) - lem_enable_intr(adapter); -} - -/********************************************************************* - * - * Fast Legacy/MSI Combined Interrupt Service routine - * - *********************************************************************/ -static int -lem_irq_fast(void *arg) -{ - struct adapter *adapter = arg; - if_t ifp; - u32 reg_icr; - - ifp = adapter->ifp; - - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - - /* Hot eject? */ - if (reg_icr == 0xffffffff) - return FILTER_STRAY; - - /* Definitely not our interrupt. */ - if (reg_icr == 0x0) - return FILTER_STRAY; - - /* - * Mask interrupts until the taskqueue is finished running. This is - * cheap, just assume that it is needed. This also works around the - * MSI message reordering errata on certain systems. - */ - lem_disable_intr(adapter); - taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); - - /* Link status change */ - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - adapter->hw.mac.get_link_status = 1; - taskqueue_enqueue(taskqueue_fast, &adapter->link_task); - } - - if (reg_icr & E1000_ICR_RXO) - adapter->rx_overruns++; - return FILTER_HANDLED; -} - - -/********************************************************************* - * - * Media Ioctl callback - * - * This routine is called whenever the user queries the status of - * the interface using ifconfig. - * - **********************************************************************/ -static void -lem_media_status(if_t ifp, struct ifmediareq *ifmr) -{ - struct adapter *adapter = if_getsoftc(ifp); - u_char fiber_type = IFM_1000_SX; - - INIT_DEBUGOUT("lem_media_status: begin"); - - EM_CORE_LOCK(adapter); - lem_update_link_status(adapter); - - ifmr->ifm_status = IFM_AVALID; - ifmr->ifm_active = IFM_ETHER; - - if (!adapter->link_active) { - EM_CORE_UNLOCK(adapter); - return; - } - - ifmr->ifm_status |= IFM_ACTIVE; - - if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || - (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { - if (adapter->hw.mac.type == e1000_82545) - fiber_type = IFM_1000_LX; - ifmr->ifm_active |= fiber_type | IFM_FDX; - } else { - switch (adapter->link_speed) { - case 10: - ifmr->ifm_active |= IFM_10_T; - break; - case 100: - ifmr->ifm_active |= IFM_100_TX; - break; - case 1000: - ifmr->ifm_active |= IFM_1000_T; - break; - } - if (adapter->link_duplex == FULL_DUPLEX) - ifmr->ifm_active |= IFM_FDX; - else - ifmr->ifm_active |= IFM_HDX; - } - EM_CORE_UNLOCK(adapter); -} - -/********************************************************************* - * - * Media Ioctl callback - * - * This routine is called when the user changes speed/duplex using - * media/mediopt option with ifconfig. - * - **********************************************************************/ -static int -lem_media_change(if_t ifp) -{ - struct adapter *adapter = if_getsoftc(ifp); - struct ifmedia *ifm = &adapter->media; - - INIT_DEBUGOUT("lem_media_change: begin"); - - if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) - return (EINVAL); - - EM_CORE_LOCK(adapter); - switch (IFM_SUBTYPE(ifm->ifm_media)) { - case IFM_AUTO: - adapter->hw.mac.autoneg = DO_AUTO_NEG; - adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; - break; - case IFM_1000_LX: - case IFM_1000_SX: - case IFM_1000_T: - adapter->hw.mac.autoneg = DO_AUTO_NEG; - adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; - break; - case IFM_100_TX: - adapter->hw.mac.autoneg = FALSE; - adapter->hw.phy.autoneg_advertised = 0; - if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) - adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; - else - adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; - break; - case IFM_10_T: - adapter->hw.mac.autoneg = FALSE; - adapter->hw.phy.autoneg_advertised = 0; - if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) - adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; - else - adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; - break; - default: - device_printf(adapter->dev, "Unsupported media type\n"); - } - - lem_init_locked(adapter); - EM_CORE_UNLOCK(adapter); - - return (0); -} - -/********************************************************************* - * - * This routine maps the mbufs to tx descriptors. - * - * return 0 on success, positive on failure - **********************************************************************/ - -static int -lem_xmit(struct adapter *adapter, struct mbuf **m_headp) -{ - bus_dma_segment_t segs[EM_MAX_SCATTER]; - bus_dmamap_t map; - struct em_buffer *tx_buffer, *tx_buffer_mapped; - struct e1000_tx_desc *ctxd = NULL; - struct mbuf *m_head; - u32 txd_upper, txd_lower, txd_used, txd_saved; - int error, nsegs, i, j, first, last = 0; - - m_head = *m_headp; - txd_upper = txd_lower = txd_used = txd_saved = 0; - - /* - ** When doing checksum offload, it is critical to - ** make sure the first mbuf has more than header, - ** because that routine expects data to be present. - */ - if ((m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) && - (m_head->m_len < ETHER_HDR_LEN + sizeof(struct ip))) { - m_head = m_pullup(m_head, ETHER_HDR_LEN + sizeof(struct ip)); - *m_headp = m_head; - if (m_head == NULL) - return (ENOBUFS); - } - - /* - * Map the packet for DMA - * - * Capture the first descriptor index, - * this descriptor will have the index - * of the EOP which is the only one that - * now gets a DONE bit writeback. - */ - first = adapter->next_avail_tx_desc; - tx_buffer = &adapter->tx_buffer_area[first]; - tx_buffer_mapped = tx_buffer; - map = tx_buffer->map; - - error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, - *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); - - /* - * There are two types of errors we can (try) to handle: - * - EFBIG means the mbuf chain was too long and bus_dma ran - * out of segments. Defragment the mbuf chain and try again. - * - ENOMEM means bus_dma could not obtain enough bounce buffers - * at this point in time. Defer sending and try again later. - * All other errors, in particular EINVAL, are fatal and prevent the - * mbuf chain from ever going through. Drop it and report error. - */ - if (error == EFBIG) { - struct mbuf *m; - - m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER); - if (m == NULL) { - adapter->mbuf_defrag_failed++; - m_freem(*m_headp); - *m_headp = NULL; - return (ENOBUFS); - } - *m_headp = m; - - /* Try it again */ - error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, - *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); - - if (error) { - adapter->no_tx_dma_setup++; - m_freem(*m_headp); - *m_headp = NULL; - return (error); - } - } else if (error != 0) { - adapter->no_tx_dma_setup++; - return (error); - } - - if (adapter->num_tx_desc_avail < (nsegs + 2)) { - adapter->no_tx_desc_avail2++; - bus_dmamap_unload(adapter->txtag, map); - return (ENOBUFS); - } - m_head = *m_headp; - - /* Do hardware assists */ - if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) - lem_transmit_checksum_setup(adapter, m_head, - &txd_upper, &txd_lower); - - i = adapter->next_avail_tx_desc; - if (adapter->pcix_82544) - txd_saved = i; - - /* Set up our transmit descriptors */ - for (j = 0; j < nsegs; j++) { - bus_size_t seg_len; - bus_addr_t seg_addr; - /* If adapter is 82544 and on PCIX bus */ - if(adapter->pcix_82544) { - DESC_ARRAY desc_array; - u32 array_elements, counter; - /* - * Check the Address and Length combination and - * split the data accordingly - */ - array_elements = lem_fill_descriptors(segs[j].ds_addr, - segs[j].ds_len, &desc_array); - for (counter = 0; counter < array_elements; counter++) { - if (txd_used == adapter->num_tx_desc_avail) { - adapter->next_avail_tx_desc = txd_saved; - adapter->no_tx_desc_avail2++; - bus_dmamap_unload(adapter->txtag, map); - return (ENOBUFS); - } - tx_buffer = &adapter->tx_buffer_area[i]; - ctxd = &adapter->tx_desc_base[i]; - ctxd->buffer_addr = htole64( - desc_array.descriptor[counter].address); - ctxd->lower.data = htole32( - (adapter->txd_cmd | txd_lower | (u16) - desc_array.descriptor[counter].length)); - ctxd->upper.data = - htole32((txd_upper)); - last = i; - if (++i == adapter->num_tx_desc) - i = 0; - tx_buffer->m_head = NULL; - tx_buffer->next_eop = -1; - txd_used++; - } - } else { - tx_buffer = &adapter->tx_buffer_area[i]; - ctxd = &adapter->tx_desc_base[i]; - seg_addr = segs[j].ds_addr; - seg_len = segs[j].ds_len; - ctxd->buffer_addr = htole64(seg_addr); - ctxd->lower.data = htole32( - adapter->txd_cmd | txd_lower | seg_len); - ctxd->upper.data = - htole32(txd_upper); - last = i; - if (++i == adapter->num_tx_desc) - i = 0; - tx_buffer->m_head = NULL; - tx_buffer->next_eop = -1; - } - } - - adapter->next_avail_tx_desc = i; - - if (adapter->pcix_82544) - adapter->num_tx_desc_avail -= txd_used; - else - adapter->num_tx_desc_avail -= nsegs; - - if (m_head->m_flags & M_VLANTAG) { - /* Set the vlan id. */ - ctxd->upper.fields.special = - htole16(m_head->m_pkthdr.ether_vtag); - /* Tell hardware to add tag */ - ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE); - } - - tx_buffer->m_head = m_head; - tx_buffer_mapped->map = tx_buffer->map; - tx_buffer->map = map; - bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE); - - /* - * Last Descriptor of Packet - * needs End Of Packet (EOP) - * and Report Status (RS) - */ - ctxd->lower.data |= - htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); - /* - * Keep track in the first buffer which - * descriptor will be written back - */ - tx_buffer = &adapter->tx_buffer_area[first]; - tx_buffer->next_eop = last; - adapter->watchdog_time = ticks; - - /* - * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 - * that this frame is available to transmit. - */ - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - -#ifdef NIC_SEND_COMBINING - if (adapter->sc_enable) { - if (adapter->shadow_tdt & MIT_PENDING_INT) { - /* signal intr and data pending */ - adapter->shadow_tdt = MIT_PENDING_TDT | (i & 0xffff); - return (0); - } else { - adapter->shadow_tdt = MIT_PENDING_INT; - } - } -#endif /* NIC_SEND_COMBINING */ - - if (adapter->hw.mac.type == e1000_82547 && - adapter->link_duplex == HALF_DUPLEX) - lem_82547_move_tail(adapter); - else { - E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), i); - if (adapter->hw.mac.type == e1000_82547) - lem_82547_update_fifo_head(adapter, - m_head->m_pkthdr.len); - } - - return (0); -} - -/********************************************************************* - * - * 82547 workaround to avoid controller hang in half-duplex environment. - * The workaround is to avoid queuing a large packet that would span - * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers - * in this case. We do that only when FIFO is quiescent. - * - **********************************************************************/ -static void -lem_82547_move_tail(void *arg) -{ - struct adapter *adapter = arg; - struct e1000_tx_desc *tx_desc; - u16 hw_tdt, sw_tdt, length = 0; - bool eop = 0; - - EM_TX_LOCK_ASSERT(adapter); - - hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT(0)); - sw_tdt = adapter->next_avail_tx_desc; - - while (hw_tdt != sw_tdt) { - tx_desc = &adapter->tx_desc_base[hw_tdt]; - length += tx_desc->lower.flags.length; - eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; - if (++hw_tdt == adapter->num_tx_desc) - hw_tdt = 0; - - if (eop) { - if (lem_82547_fifo_workaround(adapter, length)) { - adapter->tx_fifo_wrk_cnt++; - callout_reset(&adapter->tx_fifo_timer, 1, - lem_82547_move_tail, adapter); - break; - } - E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), hw_tdt); - lem_82547_update_fifo_head(adapter, length); - length = 0; - } - } -} - -static int -lem_82547_fifo_workaround(struct adapter *adapter, int len) -{ - int fifo_space, fifo_pkt_len; - - fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR); - - if (adapter->link_duplex == HALF_DUPLEX) { - fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head; - - if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) { - if (lem_82547_tx_fifo_reset(adapter)) - return (0); - else - return (1); - } - } - - return (0); -} - -static void -lem_82547_update_fifo_head(struct adapter *adapter, int len) -{ - int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR); - - /* tx_fifo_head is always 16 byte aligned */ - adapter->tx_fifo_head += fifo_pkt_len; - if (adapter->tx_fifo_head >= adapter->tx_fifo_size) { - adapter->tx_fifo_head -= adapter->tx_fifo_size; - } -} - - -static int -lem_82547_tx_fifo_reset(struct adapter *adapter) -{ - u32 tctl; - - if ((E1000_READ_REG(&adapter->hw, E1000_TDT(0)) == - E1000_READ_REG(&adapter->hw, E1000_TDH(0))) && - (E1000_READ_REG(&adapter->hw, E1000_TDFT) == - E1000_READ_REG(&adapter->hw, E1000_TDFH)) && - (E1000_READ_REG(&adapter->hw, E1000_TDFTS) == - E1000_READ_REG(&adapter->hw, E1000_TDFHS)) && - (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) { - /* Disable TX unit */ - tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); - E1000_WRITE_REG(&adapter->hw, E1000_TCTL, - tctl & ~E1000_TCTL_EN); - - /* Reset FIFO pointers */ - E1000_WRITE_REG(&adapter->hw, E1000_TDFT, - adapter->tx_head_addr); - E1000_WRITE_REG(&adapter->hw, E1000_TDFH, - adapter->tx_head_addr); - E1000_WRITE_REG(&adapter->hw, E1000_TDFTS, - adapter->tx_head_addr); - E1000_WRITE_REG(&adapter->hw, E1000_TDFHS, - adapter->tx_head_addr); - - /* Re-enable TX unit */ - E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); - E1000_WRITE_FLUSH(&adapter->hw); - - adapter->tx_fifo_head = 0; - adapter->tx_fifo_reset_cnt++; - - return (TRUE); - } - else { - return (FALSE); - } -} - -static void -lem_set_promisc(struct adapter *adapter) -{ - if_t ifp = adapter->ifp; - u32 reg_rctl; - - reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); - - if (if_getflags(ifp) & IFF_PROMISC) { - reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); - /* Turn this on if you want to see bad packets */ - if (lem_debug_sbp) - reg_rctl |= E1000_RCTL_SBP; - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); - } else if (if_getflags(ifp) & IFF_ALLMULTI) { - reg_rctl |= E1000_RCTL_MPE; - reg_rctl &= ~E1000_RCTL_UPE; - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); - } -} - -static void -lem_disable_promisc(struct adapter *adapter) -{ - if_t ifp = adapter->ifp; - u32 reg_rctl; - int mcnt = 0; - - reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); - reg_rctl &= (~E1000_RCTL_UPE); - if (if_getflags(ifp) & IFF_ALLMULTI) - mcnt = MAX_NUM_MULTICAST_ADDRESSES; - else - mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); - - /* Don't disable if in MAX groups */ - if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) - reg_rctl &= (~E1000_RCTL_MPE); - reg_rctl &= (~E1000_RCTL_SBP); - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); -} - - -/********************************************************************* - * Multicast Update - * - * This routine is called whenever multicast address list is updated. - * - **********************************************************************/ - -static void -lem_set_multi(struct adapter *adapter) -{ - if_t ifp = adapter->ifp; - u32 reg_rctl = 0; - u8 *mta; /* Multicast array memory */ - int mcnt = 0; - - IOCTL_DEBUGOUT("lem_set_multi: begin"); - - mta = adapter->mta; - bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); - - if (adapter->hw.mac.type == e1000_82542 && - adapter->hw.revision_id == E1000_REVISION_2) { - reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); - if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) - e1000_pci_clear_mwi(&adapter->hw); - reg_rctl |= E1000_RCTL_RST; - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); - msec_delay(5); - } - - if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES); - - if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { - reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); - reg_rctl |= E1000_RCTL_MPE; - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); - } else - e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); - - if (adapter->hw.mac.type == e1000_82542 && - adapter->hw.revision_id == E1000_REVISION_2) { - reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); - reg_rctl &= ~E1000_RCTL_RST; - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); - msec_delay(5); - if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) - e1000_pci_set_mwi(&adapter->hw); - } -} - - -/********************************************************************* - * Timer routine - * - * This routine checks for link status and updates statistics. - * - **********************************************************************/ - -static void -lem_local_timer(void *arg) -{ - struct adapter *adapter = arg; - - EM_CORE_LOCK_ASSERT(adapter); - - lem_update_link_status(adapter); - lem_update_stats_counters(adapter); - - lem_smartspeed(adapter); - - /* - * We check the watchdog: the time since - * the last TX descriptor was cleaned. - * This implies a functional TX engine. - */ - if ((adapter->watchdog_check == TRUE) && - (ticks - adapter->watchdog_time > EM_WATCHDOG)) - goto hung; - - callout_reset(&adapter->timer, hz, lem_local_timer, adapter); - return; -hung: - device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); - if_setdrvflagbits(adapter->ifp, 0, IFF_DRV_RUNNING); - adapter->watchdog_events++; - lem_init_locked(adapter); -} - -static void -lem_update_link_status(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - if_t ifp = adapter->ifp; - device_t dev = adapter->dev; - u32 link_check = 0; - - /* Get the cached link value or read phy for real */ - switch (hw->phy.media_type) { - case e1000_media_type_copper: - if (hw->mac.get_link_status) { - /* Do the work to read phy */ - e1000_check_for_link(hw); - link_check = !hw->mac.get_link_status; - if (link_check) /* ESB2 fix */ - e1000_cfg_on_link_up(hw); - } else - link_check = TRUE; - break; - case e1000_media_type_fiber: - e1000_check_for_link(hw); - link_check = (E1000_READ_REG(hw, E1000_STATUS) & - E1000_STATUS_LU); - break; - case e1000_media_type_internal_serdes: - e1000_check_for_link(hw); - link_check = adapter->hw.mac.serdes_has_link; - break; - default: - case e1000_media_type_unknown: - break; - } - - /* Now check for a transition */ - if (link_check && (adapter->link_active == 0)) { - e1000_get_speed_and_duplex(hw, &adapter->link_speed, - &adapter->link_duplex); - if (bootverbose) - device_printf(dev, "Link is up %d Mbps %s\n", - adapter->link_speed, - ((adapter->link_duplex == FULL_DUPLEX) ? - "Full Duplex" : "Half Duplex")); - adapter->link_active = 1; - adapter->smartspeed = 0; - if_setbaudrate(ifp, adapter->link_speed * 1000000); - if_link_state_change(ifp, LINK_STATE_UP); - } else if (!link_check && (adapter->link_active == 1)) { - if_setbaudrate(ifp, 0); - adapter->link_speed = 0; - adapter->link_duplex = 0; - if (bootverbose) - device_printf(dev, "Link is Down\n"); - adapter->link_active = 0; - /* Link down, disable watchdog */ - adapter->watchdog_check = FALSE; - if_link_state_change(ifp, LINK_STATE_DOWN); - } -} - -/********************************************************************* - * - * This routine disables all traffic on the adapter by issuing a - * global reset on the MAC and deallocates TX/RX buffers. - * - * This routine should always be called with BOTH the CORE - * and TX locks. - **********************************************************************/ - -static void -lem_stop(void *arg) -{ - struct adapter *adapter = arg; - if_t ifp = adapter->ifp; - - EM_CORE_LOCK_ASSERT(adapter); - EM_TX_LOCK_ASSERT(adapter); - - INIT_DEBUGOUT("lem_stop: begin"); - - lem_disable_intr(adapter); - callout_stop(&adapter->timer); - callout_stop(&adapter->tx_fifo_timer); - - /* Tell the stack that the interface is no longer active */ - if_setdrvflagbits(ifp, 0, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)); - - e1000_reset_hw(&adapter->hw); - if (adapter->hw.mac.type >= e1000_82544) - E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); - - e1000_led_off(&adapter->hw); - e1000_cleanup_led(&adapter->hw); -} - - -/********************************************************************* - * - * Determine hardware revision. - * - **********************************************************************/ -static void -lem_identify_hardware(struct adapter *adapter) -{ - device_t dev = adapter->dev; - - /* Make sure our PCI config space has the necessary stuff set */ - pci_enable_busmaster(dev); - adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); - - /* Save off the information about this board */ - adapter->hw.vendor_id = pci_get_vendor(dev); - adapter->hw.device_id = pci_get_device(dev); - adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); - adapter->hw.subsystem_vendor_id = - pci_read_config(dev, PCIR_SUBVEND_0, 2); - adapter->hw.subsystem_device_id = - pci_read_config(dev, PCIR_SUBDEV_0, 2); - - /* Do Shared Code Init and Setup */ - if (e1000_set_mac_type(&adapter->hw)) { - device_printf(dev, "Setup init failure\n"); - return; - } -} - -static int -lem_allocate_pci_resources(struct adapter *adapter) -{ - device_t dev = adapter->dev; - int val, rid, error = E1000_SUCCESS; - - rid = PCIR_BAR(0); - adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, - &rid, RF_ACTIVE); - if (adapter->memory == NULL) { - device_printf(dev, "Unable to allocate bus resource: memory\n"); - return (ENXIO); - } - adapter->osdep.mem_bus_space_tag = - rman_get_bustag(adapter->memory); - adapter->osdep.mem_bus_space_handle = - rman_get_bushandle(adapter->memory); - adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; - - /* Only older adapters use IO mapping */ - if (adapter->hw.mac.type > e1000_82543) { - /* Figure our where our IO BAR is ? */ - for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { - val = pci_read_config(dev, rid, 4); - if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { - adapter->io_rid = rid; - break; - } - rid += 4; - /* check for 64bit BAR */ - if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT) - rid += 4; - } - if (rid >= PCIR_CIS) { - device_printf(dev, "Unable to locate IO BAR\n"); - return (ENXIO); - } - adapter->ioport = bus_alloc_resource_any(dev, - SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE); - if (adapter->ioport == NULL) { - device_printf(dev, "Unable to allocate bus resource: " - "ioport\n"); - return (ENXIO); - } - adapter->hw.io_base = 0; - adapter->osdep.io_bus_space_tag = - rman_get_bustag(adapter->ioport); - adapter->osdep.io_bus_space_handle = - rman_get_bushandle(adapter->ioport); - } - - adapter->hw.back = &adapter->osdep; - - return (error); -} - -/********************************************************************* - * - * Setup the Legacy or MSI Interrupt handler - * - **********************************************************************/ -int -lem_allocate_irq(struct adapter *adapter) -{ - device_t dev = adapter->dev; - int error, rid = 0; - - /* Manually turn off all interrupts */ - E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); - - /* We allocate a single interrupt resource */ - adapter->res[0] = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (adapter->res[0] == NULL) { - device_printf(dev, "Unable to allocate bus resource: " - "interrupt\n"); - return (ENXIO); - } - - /* Do Legacy setup? */ - if (lem_use_legacy_irq) { - if ((error = bus_setup_intr(dev, adapter->res[0], - INTR_TYPE_NET | INTR_MPSAFE, NULL, lem_intr, adapter, - &adapter->tag[0])) != 0) { - device_printf(dev, - "Failed to register interrupt handler"); - return (error); - } - return (0); - } - - /* - * Use a Fast interrupt and the associated - * deferred processing contexts. - */ - TASK_INIT(&adapter->rxtx_task, 0, lem_handle_rxtx, adapter); - TASK_INIT(&adapter->link_task, 0, lem_handle_link, adapter); - adapter->tq = taskqueue_create_fast("lem_taskq", M_NOWAIT, - taskqueue_thread_enqueue, &adapter->tq); - taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq", - device_get_nameunit(adapter->dev)); - if ((error = bus_setup_intr(dev, adapter->res[0], - INTR_TYPE_NET, lem_irq_fast, NULL, adapter, - &adapter->tag[0])) != 0) { - device_printf(dev, "Failed to register fast interrupt " - "handler: %d\n", error); - taskqueue_free(adapter->tq); - adapter->tq = NULL; - return (error); - } - - return (0); -} - - -static void -lem_free_pci_resources(struct adapter *adapter) -{ - device_t dev = adapter->dev; - - - if (adapter->tag[0] != NULL) { - bus_teardown_intr(dev, adapter->res[0], - adapter->tag[0]); - adapter->tag[0] = NULL; - } - - if (adapter->res[0] != NULL) { - bus_release_resource(dev, SYS_RES_IRQ, - 0, adapter->res[0]); - } - - if (adapter->memory != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), adapter->memory); - - if (adapter->ioport != NULL) - bus_release_resource(dev, SYS_RES_IOPORT, - adapter->io_rid, adapter->ioport); -} - - -/********************************************************************* - * - * Initialize the hardware to a configuration - * as specified by the adapter structure. - * - **********************************************************************/ -static int -lem_hardware_init(struct adapter *adapter) -{ - device_t dev = adapter->dev; - u16 rx_buffer_size; - - INIT_DEBUGOUT("lem_hardware_init: begin"); - - /* Issue a global reset */ - e1000_reset_hw(&adapter->hw); - - /* When hardware is reset, fifo_head is also reset */ - adapter->tx_fifo_head = 0; - - /* - * These parameters control the automatic generation (Tx) and - * response (Rx) to Ethernet PAUSE frames. - * - High water mark should allow for at least two frames to be - * received after sending an XOFF. - * - Low water mark works best when it is very near the high water mark. - * This allows the receiver to restart by sending XON when it has - * drained a bit. Here we use an arbitrary value of 1500 which will - * restart after one full frame is pulled from the buffer. There - * could be several smaller frames in the buffer and if so they will - * not trigger the XON until their total number reduces the buffer - * by 1500. - * - The pause time is fairly large at 1000 x 512ns = 512 usec. - */ - rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) & - 0xffff) << 10 ); - - adapter->hw.fc.high_water = rx_buffer_size - - roundup2(adapter->max_frame_size, 1024); - adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500; - - adapter->hw.fc.pause_time = EM_FC_PAUSE_TIME; - adapter->hw.fc.send_xon = TRUE; - - /* Set Flow control, use the tunable location if sane */ - if ((lem_fc_setting >= 0) && (lem_fc_setting < 4)) - adapter->hw.fc.requested_mode = lem_fc_setting; - else - adapter->hw.fc.requested_mode = e1000_fc_none; - - if (e1000_init_hw(&adapter->hw) < 0) { - device_printf(dev, "Hardware Initialization Failed\n"); - return (EIO); - } - - e1000_check_for_link(&adapter->hw); - - return (0); -} - -/********************************************************************* - * - * Setup networking device structure and register an interface. - * - **********************************************************************/ -static int -lem_setup_interface(device_t dev, struct adapter *adapter) -{ - if_t ifp; - - INIT_DEBUGOUT("lem_setup_interface: begin"); - - ifp = adapter->ifp = if_gethandle(IFT_ETHER); - if (ifp == (void *)NULL) { - device_printf(dev, "can not allocate ifnet structure\n"); - return (-1); - } - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - if_setinitfn(ifp, lem_init); - if_setsoftc(ifp, adapter); - if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); - if_setioctlfn(ifp, lem_ioctl); - if_setstartfn(ifp, lem_start); - if_setgetcounterfn(ifp, lem_get_counter); - if_setsendqlen(ifp, adapter->num_tx_desc - 1); - if_setsendqready(ifp); - - ether_ifattach(ifp, adapter->hw.mac.addr); - - if_setcapabilities(ifp, 0); - - if (adapter->hw.mac.type >= e1000_82543) { - if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM, 0); - if_setcapenablebit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM, 0); - } - - /* - * Tell the upper layer(s) we support long frames. - */ - if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); - if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU, 0); - if_setcapenablebit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU, 0); - - /* - ** Dont turn this on by default, if vlans are - ** created on another pseudo device (eg. lagg) - ** then vlan events are not passed thru, breaking - ** operation, but with HW FILTER off it works. If - ** using vlans directly on the em driver you can - ** enable this and get full hardware tag filtering. - */ - if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0); - -#ifdef DEVICE_POLLING - if_setcapabilitiesbit(ifp, IFCAP_POLLING, 0); -#endif - - /* Enable only WOL MAGIC by default */ - if (adapter->wol) { - if_setcapabilitiesbit(ifp, IFCAP_WOL, 0); - if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0); - } - - /* - * Specify the media types supported by this adapter and register - * callbacks to update media and link information - */ - ifmedia_init(&adapter->media, IFM_IMASK, - lem_media_change, lem_media_status); - if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || - (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { - u_char fiber_type = IFM_1000_SX; /* default type */ - - if (adapter->hw.mac.type == e1000_82545) - fiber_type = IFM_1000_LX; - ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, - 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); - } else { - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, - 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, - 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, - 0, NULL); - if (adapter->hw.phy.type != e1000_phy_ife) { - ifmedia_add(&adapter->media, - IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); - ifmedia_add(&adapter->media, - IFM_ETHER | IFM_1000_T, 0, NULL); - } - } - ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); - ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); - return (0); -} - - -/********************************************************************* - * - * Workaround for SmartSpeed on 82541 and 82547 controllers - * - **********************************************************************/ -static void -lem_smartspeed(struct adapter *adapter) -{ - u16 phy_tmp; - - if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) || - adapter->hw.mac.autoneg == 0 || - (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0) - return; - - if (adapter->smartspeed == 0) { - /* If Master/Slave config fault is asserted twice, - * we assume back-to-back */ - e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); - if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT)) - return; - e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); - if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) { - e1000_read_phy_reg(&adapter->hw, - PHY_1000T_CTRL, &phy_tmp); - if(phy_tmp & CR_1000T_MS_ENABLE) { - phy_tmp &= ~CR_1000T_MS_ENABLE; - e1000_write_phy_reg(&adapter->hw, - PHY_1000T_CTRL, phy_tmp); - adapter->smartspeed++; - if(adapter->hw.mac.autoneg && - !e1000_copper_link_autoneg(&adapter->hw) && - !e1000_read_phy_reg(&adapter->hw, - PHY_CONTROL, &phy_tmp)) { - phy_tmp |= (MII_CR_AUTO_NEG_EN | - MII_CR_RESTART_AUTO_NEG); - e1000_write_phy_reg(&adapter->hw, - PHY_CONTROL, phy_tmp); - } - } - } - return; - } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) { - /* If still no link, perhaps using 2/3 pair cable */ - e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); - phy_tmp |= CR_1000T_MS_ENABLE; - e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); - if(adapter->hw.mac.autoneg && - !e1000_copper_link_autoneg(&adapter->hw) && - !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { - phy_tmp |= (MII_CR_AUTO_NEG_EN | - MII_CR_RESTART_AUTO_NEG); - e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); - } - } - /* Restart process after EM_SMARTSPEED_MAX iterations */ - if(adapter->smartspeed++ == EM_SMARTSPEED_MAX) - adapter->smartspeed = 0; -} - - -/* - * Manage DMA'able memory. - */ -static void -lem_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) -{ - if (error) - return; - *(bus_addr_t *) arg = segs[0].ds_addr; -} - -static int -lem_dma_malloc(struct adapter *adapter, bus_size_t size, - struct em_dma_alloc *dma, int mapflags) -{ - int error; - - error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ - EM_DBA_ALIGN, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - size, /* maxsize */ - 1, /* nsegments */ - size, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockarg */ - &dma->dma_tag); - if (error) { - device_printf(adapter->dev, - "%s: bus_dma_tag_create failed: %d\n", - __func__, error); - goto fail_0; - } - - error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, - BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); - if (error) { - device_printf(adapter->dev, - "%s: bus_dmamem_alloc(%ju) failed: %d\n", - __func__, (uintmax_t)size, error); - goto fail_2; - } - - dma->dma_paddr = 0; - error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, - size, lem_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); - if (error || dma->dma_paddr == 0) { - device_printf(adapter->dev, - "%s: bus_dmamap_load failed: %d\n", - __func__, error); - goto fail_3; - } - - return (0); - -fail_3: - bus_dmamap_unload(dma->dma_tag, dma->dma_map); -fail_2: - bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); - bus_dma_tag_destroy(dma->dma_tag); -fail_0: - dma->dma_tag = NULL; - - return (error); -} - -static void -lem_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) -{ - if (dma->dma_tag == NULL) - return; - if (dma->dma_paddr != 0) { - bus_dmamap_sync(dma->dma_tag, dma->dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(dma->dma_tag, dma->dma_map); - dma->dma_paddr = 0; - } - if (dma->dma_vaddr != NULL) { - bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); - dma->dma_vaddr = NULL; - } - bus_dma_tag_destroy(dma->dma_tag); - dma->dma_tag = NULL; -} - - -/********************************************************************* - * - * Allocate memory for tx_buffer structures. The tx_buffer stores all - * the information needed to transmit a packet on the wire. - * - **********************************************************************/ -static int -lem_allocate_transmit_structures(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct em_buffer *tx_buffer; - int error; - - /* - * Create DMA tags for tx descriptors - */ - if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - MCLBYTES * EM_MAX_SCATTER, /* maxsize */ - EM_MAX_SCATTER, /* nsegments */ - MCLBYTES, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockarg */ - &adapter->txtag)) != 0) { - device_printf(dev, "Unable to allocate TX DMA tag\n"); - goto fail; - } - - adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) * - adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); - if (adapter->tx_buffer_area == NULL) { - device_printf(dev, "Unable to allocate tx_buffer memory\n"); - error = ENOMEM; - goto fail; - } - - /* Create the descriptor buffer dma maps */ - for (int i = 0; i < adapter->num_tx_desc; i++) { - tx_buffer = &adapter->tx_buffer_area[i]; - error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map); - if (error != 0) { - device_printf(dev, "Unable to create TX DMA map\n"); - goto fail; - } - tx_buffer->next_eop = -1; - } - - return (0); -fail: - lem_free_transmit_structures(adapter); - return (error); -} - -/********************************************************************* - * - * (Re)Initialize transmit structures. - * - **********************************************************************/ -static void -lem_setup_transmit_structures(struct adapter *adapter) -{ - struct em_buffer *tx_buffer; -#ifdef DEV_NETMAP - /* we are already locked */ - struct netmap_adapter *na = netmap_getna(adapter->ifp); - struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0); -#endif /* DEV_NETMAP */ - - /* Clear the old ring contents */ - bzero(adapter->tx_desc_base, - (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); - - /* Free any existing TX buffers */ - for (int i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { - tx_buffer = &adapter->tx_buffer_area[i]; - bus_dmamap_sync(adapter->txtag, tx_buffer->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(adapter->txtag, tx_buffer->map); - m_freem(tx_buffer->m_head); - tx_buffer->m_head = NULL; -#ifdef DEV_NETMAP - if (slot) { - /* the i-th NIC entry goes to slot si */ - int si = netmap_idx_n2k(&na->tx_rings[0], i); - uint64_t paddr; - void *addr; - - addr = PNMB(na, slot + si, &paddr); - adapter->tx_desc_base[i].buffer_addr = htole64(paddr); - /* reload the map for netmap mode */ - netmap_load_map(na, adapter->txtag, tx_buffer->map, addr); - } -#endif /* DEV_NETMAP */ - tx_buffer->next_eop = -1; - } - - /* Reset state */ - adapter->last_hw_offload = 0; - adapter->next_avail_tx_desc = 0; - adapter->next_tx_to_clean = 0; - adapter->num_tx_desc_avail = adapter->num_tx_desc; - - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - return; -} - -/********************************************************************* - * - * Enable transmit unit. - * - **********************************************************************/ -static void -lem_initialize_transmit_unit(struct adapter *adapter) -{ - u32 tctl, tipg = 0; - u64 bus_addr; - - INIT_DEBUGOUT("lem_initialize_transmit_unit: begin"); - /* Setup the Base and Length of the Tx Descriptor Ring */ - bus_addr = adapter->txdma.dma_paddr; - E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(0), - adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); - E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(0), - (u32)(bus_addr >> 32)); - E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(0), - (u32)bus_addr); - /* Setup the HW Tx Head and Tail descriptor pointers */ - E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), 0); - E1000_WRITE_REG(&adapter->hw, E1000_TDH(0), 0); - - HW_DEBUGOUT2("Base = %x, Length = %x\n", - E1000_READ_REG(&adapter->hw, E1000_TDBAL(0)), - E1000_READ_REG(&adapter->hw, E1000_TDLEN(0))); - - /* Set the default values for the Tx Inter Packet Gap timer */ - switch (adapter->hw.mac.type) { - case e1000_82542: - tipg = DEFAULT_82542_TIPG_IPGT; - tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; - tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; - break; - default: - if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || - (adapter->hw.phy.media_type == - e1000_media_type_internal_serdes)) - tipg = DEFAULT_82543_TIPG_IPGT_FIBER; - else - tipg = DEFAULT_82543_TIPG_IPGT_COPPER; - tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; - tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; - } - - E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); - E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); - if(adapter->hw.mac.type >= e1000_82540) - E1000_WRITE_REG(&adapter->hw, E1000_TADV, - adapter->tx_abs_int_delay.value); - - /* Program the Transmit Control Register */ - tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); - tctl &= ~E1000_TCTL_CT; - tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | - (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); - - /* This write will effectively turn on the transmit unit. */ - E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); - - /* Setup Transmit Descriptor Base Settings */ - adapter->txd_cmd = E1000_TXD_CMD_IFCS; - - if (adapter->tx_int_delay.value > 0) - adapter->txd_cmd |= E1000_TXD_CMD_IDE; -} - -/********************************************************************* - * - * Free all transmit related data structures. - * - **********************************************************************/ -static void -lem_free_transmit_structures(struct adapter *adapter) -{ - struct em_buffer *tx_buffer; - - INIT_DEBUGOUT("free_transmit_structures: begin"); - - if (adapter->tx_buffer_area != NULL) { - for (int i = 0; i < adapter->num_tx_desc; i++) { - tx_buffer = &adapter->tx_buffer_area[i]; - if (tx_buffer->m_head != NULL) { - bus_dmamap_sync(adapter->txtag, tx_buffer->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(adapter->txtag, - tx_buffer->map); - m_freem(tx_buffer->m_head); - tx_buffer->m_head = NULL; - } else if (tx_buffer->map != NULL) - bus_dmamap_unload(adapter->txtag, - tx_buffer->map); - if (tx_buffer->map != NULL) { - bus_dmamap_destroy(adapter->txtag, - tx_buffer->map); - tx_buffer->map = NULL; - } - } - } - if (adapter->tx_buffer_area != NULL) { - free(adapter->tx_buffer_area, M_DEVBUF); - adapter->tx_buffer_area = NULL; - } - if (adapter->txtag != NULL) { - bus_dma_tag_destroy(adapter->txtag); - adapter->txtag = NULL; - } -} - -/********************************************************************* - * - * The offload context needs to be set when we transfer the first - * packet of a particular protocol (TCP/UDP). This routine has been - * enhanced to deal with inserted VLAN headers, and IPV6 (not complete) - * - * Added back the old method of keeping the current context type - * and not setting if unnecessary, as this is reported to be a - * big performance win. -jfv - **********************************************************************/ -static void -lem_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp, - u32 *txd_upper, u32 *txd_lower) -{ - struct e1000_context_desc *TXD = NULL; - struct em_buffer *tx_buffer; - struct ether_vlan_header *eh; - struct ip *ip = NULL; - struct ip6_hdr *ip6; - int curr_txd, ehdrlen; - u32 cmd, hdr_len, ip_hlen; - u16 etype; - u8 ipproto; - - - cmd = hdr_len = ipproto = 0; - *txd_upper = *txd_lower = 0; - curr_txd = adapter->next_avail_tx_desc; - - /* - * Determine where frame payload starts. - * Jump over vlan headers if already present, - * helpful for QinQ too. - */ - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - etype = ntohs(eh->evl_proto); - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - } else { - etype = ntohs(eh->evl_encap_proto); - ehdrlen = ETHER_HDR_LEN; - } - - /* - * We only support TCP/UDP for IPv4 and IPv6 for the moment. - * TODO: Support SCTP too when it hits the tree. - */ - switch (etype) { - case ETHERTYPE_IP: - ip = (struct ip *)(mp->m_data + ehdrlen); - ip_hlen = ip->ip_hl << 2; - - /* Setup of IP header checksum. */ - if (mp->m_pkthdr.csum_flags & CSUM_IP) { - /* - * Start offset for header checksum calculation. - * End offset for header checksum calculation. - * Offset of place to put the checksum. - */ - TXD = (struct e1000_context_desc *) - &adapter->tx_desc_base[curr_txd]; - TXD->lower_setup.ip_fields.ipcss = ehdrlen; - TXD->lower_setup.ip_fields.ipcse = - htole16(ehdrlen + ip_hlen); - TXD->lower_setup.ip_fields.ipcso = - ehdrlen + offsetof(struct ip, ip_sum); - cmd |= E1000_TXD_CMD_IP; - *txd_upper |= E1000_TXD_POPTS_IXSM << 8; - } - - hdr_len = ehdrlen + ip_hlen; - ipproto = ip->ip_p; - - break; - case ETHERTYPE_IPV6: - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */ - - /* IPv6 doesn't have a header checksum. */ - - hdr_len = ehdrlen + ip_hlen; - ipproto = ip6->ip6_nxt; - break; - - default: - return; - } - - switch (ipproto) { - case IPPROTO_TCP: - if (mp->m_pkthdr.csum_flags & CSUM_TCP) { - *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; - *txd_upper |= E1000_TXD_POPTS_TXSM << 8; - /* no need for context if already set */ - if (adapter->last_hw_offload == CSUM_TCP) - return; - adapter->last_hw_offload = CSUM_TCP; - /* - * Start offset for payload checksum calculation. - * End offset for payload checksum calculation. - * Offset of place to put the checksum. - */ - TXD = (struct e1000_context_desc *) - &adapter->tx_desc_base[curr_txd]; - TXD->upper_setup.tcp_fields.tucss = hdr_len; - TXD->upper_setup.tcp_fields.tucse = htole16(0); - TXD->upper_setup.tcp_fields.tucso = - hdr_len + offsetof(struct tcphdr, th_sum); - cmd |= E1000_TXD_CMD_TCP; - } - break; - case IPPROTO_UDP: - { - if (mp->m_pkthdr.csum_flags & CSUM_UDP) { - *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; - *txd_upper |= E1000_TXD_POPTS_TXSM << 8; - /* no need for context if already set */ - if (adapter->last_hw_offload == CSUM_UDP) - return; - adapter->last_hw_offload = CSUM_UDP; - /* - * Start offset for header checksum calculation. - * End offset for header checksum calculation. - * Offset of place to put the checksum. - */ - TXD = (struct e1000_context_desc *) - &adapter->tx_desc_base[curr_txd]; - TXD->upper_setup.tcp_fields.tucss = hdr_len; - TXD->upper_setup.tcp_fields.tucse = htole16(0); - TXD->upper_setup.tcp_fields.tucso = - hdr_len + offsetof(struct udphdr, uh_sum); - } - /* Fall Thru */ - } - default: - break; - } - - if (TXD == NULL) - return; - TXD->tcp_seg_setup.data = htole32(0); - TXD->cmd_and_length = - htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); - tx_buffer = &adapter->tx_buffer_area[curr_txd]; - tx_buffer->m_head = NULL; - tx_buffer->next_eop = -1; - - if (++curr_txd == adapter->num_tx_desc) - curr_txd = 0; - - adapter->num_tx_desc_avail--; - adapter->next_avail_tx_desc = curr_txd; -} - - -/********************************************************************** - * - * Examine each tx_buffer in the used queue. If the hardware is done - * processing the packet then free associated resources. The - * tx_buffer is put back on the free queue. - * - **********************************************************************/ -static void -lem_txeof(struct adapter *adapter) -{ - int first, last, done, num_avail; - struct em_buffer *tx_buffer; - struct e1000_tx_desc *tx_desc, *eop_desc; - if_t ifp = adapter->ifp; - - EM_TX_LOCK_ASSERT(adapter); - -#ifdef DEV_NETMAP - if (netmap_tx_irq(ifp, 0)) - return; -#endif /* DEV_NETMAP */ - if (adapter->num_tx_desc_avail == adapter->num_tx_desc) - return; - - num_avail = adapter->num_tx_desc_avail; - first = adapter->next_tx_to_clean; - tx_desc = &adapter->tx_desc_base[first]; - tx_buffer = &adapter->tx_buffer_area[first]; - last = tx_buffer->next_eop; - eop_desc = &adapter->tx_desc_base[last]; - - /* - * What this does is get the index of the - * first descriptor AFTER the EOP of the - * first packet, that way we can do the - * simple comparison on the inner while loop. - */ - if (++last == adapter->num_tx_desc) - last = 0; - done = last; - - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - - while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { - /* We clean the range of the packet */ - while (first != done) { - tx_desc->upper.data = 0; - tx_desc->lower.data = 0; - tx_desc->buffer_addr = 0; - ++num_avail; - - if (tx_buffer->m_head) { - if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); - bus_dmamap_sync(adapter->txtag, - tx_buffer->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(adapter->txtag, - tx_buffer->map); - - m_freem(tx_buffer->m_head); - tx_buffer->m_head = NULL; - } - tx_buffer->next_eop = -1; - adapter->watchdog_time = ticks; - - if (++first == adapter->num_tx_desc) - first = 0; - - tx_buffer = &adapter->tx_buffer_area[first]; - tx_desc = &adapter->tx_desc_base[first]; - } - /* See if we can continue to the next packet */ - last = tx_buffer->next_eop; - if (last != -1) { - eop_desc = &adapter->tx_desc_base[last]; - /* Get new done point */ - if (++last == adapter->num_tx_desc) last = 0; - done = last; - } else - break; - } - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - adapter->next_tx_to_clean = first; - adapter->num_tx_desc_avail = num_avail; - -#ifdef NIC_SEND_COMBINING - if ((adapter->shadow_tdt & MIT_PENDING_TDT) == MIT_PENDING_TDT) { - /* a tdt write is pending, do it */ - E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), - 0xffff & adapter->shadow_tdt); - adapter->shadow_tdt = MIT_PENDING_INT; - } else { - adapter->shadow_tdt = 0; // disable - } -#endif /* NIC_SEND_COMBINING */ - /* - * If we have enough room, clear IFF_DRV_OACTIVE to - * tell the stack that it is OK to send packets. - * If there are no pending descriptors, clear the watchdog. - */ - if (adapter->num_tx_desc_avail > EM_TX_CLEANUP_THRESHOLD) { - if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); - if (adapter->num_tx_desc_avail == adapter->num_tx_desc) { - adapter->watchdog_check = FALSE; - return; - } - } -} - -/********************************************************************* - * - * When Link is lost sometimes there is work still in the TX ring - * which may result in a watchdog, rather than allow that we do an - * attempted cleanup and then reinit here. Note that this has been - * seens mostly with fiber adapters. - * - **********************************************************************/ -static void -lem_tx_purge(struct adapter *adapter) -{ - if ((!adapter->link_active) && (adapter->watchdog_check)) { - EM_TX_LOCK(adapter); - lem_txeof(adapter); - EM_TX_UNLOCK(adapter); - if (adapter->watchdog_check) /* Still outstanding? */ - lem_init_locked(adapter); - } -} - -/********************************************************************* - * - * Get a buffer from system mbuf buffer pool. - * - **********************************************************************/ -static int -lem_get_buf(struct adapter *adapter, int i) -{ - struct mbuf *m; - bus_dma_segment_t segs[1]; - bus_dmamap_t map; - struct em_buffer *rx_buffer; - int error, nsegs; - - m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); - if (m == NULL) { - adapter->mbuf_cluster_failed++; - return (ENOBUFS); - } - m->m_len = m->m_pkthdr.len = MCLBYTES; - - if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) - m_adj(m, ETHER_ALIGN); - - /* - * Using memory from the mbuf cluster pool, invoke the - * bus_dma machinery to arrange the memory mapping. - */ - error = bus_dmamap_load_mbuf_sg(adapter->rxtag, - adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - m_free(m); - return (error); - } - - /* If nsegs is wrong then the stack is corrupt. */ - KASSERT(nsegs == 1, ("Too many segments returned!")); - - rx_buffer = &adapter->rx_buffer_area[i]; - if (rx_buffer->m_head != NULL) - bus_dmamap_unload(adapter->rxtag, rx_buffer->map); - - map = rx_buffer->map; - rx_buffer->map = adapter->rx_sparemap; - adapter->rx_sparemap = map; - bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD); - rx_buffer->m_head = m; - - adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr); - return (0); -} - -/********************************************************************* - * - * Allocate memory for rx_buffer structures. Since we use one - * rx_buffer per received packet, the maximum number of rx_buffer's - * that we'll need is equal to the number of receive descriptors - * that we've allocated. - * - **********************************************************************/ -static int -lem_allocate_receive_structures(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct em_buffer *rx_buffer; - int i, error; - - adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * - adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); - if (adapter->rx_buffer_area == NULL) { - device_printf(dev, "Unable to allocate rx_buffer memory\n"); - return (ENOMEM); - } - - error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - MCLBYTES, /* maxsize */ - 1, /* nsegments */ - MCLBYTES, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockarg */ - &adapter->rxtag); - if (error) { - device_printf(dev, "%s: bus_dma_tag_create failed %d\n", - __func__, error); - goto fail; - } - - /* Create the spare map (used by getbuf) */ - error = bus_dmamap_create(adapter->rxtag, 0, &adapter->rx_sparemap); - if (error) { - device_printf(dev, "%s: bus_dmamap_create failed: %d\n", - __func__, error); - goto fail; - } - - rx_buffer = adapter->rx_buffer_area; - for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { - error = bus_dmamap_create(adapter->rxtag, 0, &rx_buffer->map); - if (error) { - device_printf(dev, "%s: bus_dmamap_create failed: %d\n", - __func__, error); - goto fail; - } - } - - return (0); - -fail: - lem_free_receive_structures(adapter); - return (error); -} - -/********************************************************************* - * - * (Re)initialize receive structures. - * - **********************************************************************/ -static int -lem_setup_receive_structures(struct adapter *adapter) -{ - struct em_buffer *rx_buffer; - int i, error; -#ifdef DEV_NETMAP - /* we are already under lock */ - struct netmap_adapter *na = netmap_getna(adapter->ifp); - struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0); -#endif - - /* Reset descriptor ring */ - bzero(adapter->rx_desc_base, - (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc); - - /* Free current RX buffers. */ - rx_buffer = adapter->rx_buffer_area; - for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { - if (rx_buffer->m_head != NULL) { - bus_dmamap_sync(adapter->rxtag, rx_buffer->map, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(adapter->rxtag, rx_buffer->map); - m_freem(rx_buffer->m_head); - rx_buffer->m_head = NULL; - } - } - - /* Allocate new ones. */ - for (i = 0; i < adapter->num_rx_desc; i++) { -#ifdef DEV_NETMAP - if (slot) { - /* the i-th NIC entry goes to slot si */ - int si = netmap_idx_n2k(&na->rx_rings[0], i); - uint64_t paddr; - void *addr; - - addr = PNMB(na, slot + si, &paddr); - netmap_load_map(na, adapter->rxtag, rx_buffer->map, addr); - /* Update descriptor */ - adapter->rx_desc_base[i].buffer_addr = htole64(paddr); - continue; - } -#endif /* DEV_NETMAP */ - error = lem_get_buf(adapter, i); - if (error) - return (error); - } - - /* Setup our descriptor pointers */ - adapter->next_rx_desc_to_check = 0; - bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - return (0); -} - -/********************************************************************* - * - * Enable receive unit. - * - **********************************************************************/ - -static void -lem_initialize_receive_unit(struct adapter *adapter) -{ - if_t ifp = adapter->ifp; - u64 bus_addr; - u32 rctl, rxcsum; - - INIT_DEBUGOUT("lem_initialize_receive_unit: begin"); - - /* - * Make sure receives are disabled while setting - * up the descriptor ring - */ - rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); - - if (adapter->hw.mac.type >= e1000_82540) { - E1000_WRITE_REG(&adapter->hw, E1000_RADV, - adapter->rx_abs_int_delay.value); - /* - * Set the interrupt throttling rate. Value is calculated - * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) - */ - E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR); - } - - /* Setup the Base and Length of the Rx Descriptor Ring */ - bus_addr = adapter->rxdma.dma_paddr; - E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(0), - adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); - E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(0), - (u32)(bus_addr >> 32)); - E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(0), - (u32)bus_addr); - - /* Setup the Receive Control Register */ - rctl &= ~(3 << E1000_RCTL_MO_SHIFT); - rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | - E1000_RCTL_RDMTS_HALF | - (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); - - /* Make sure VLAN Filters are off */ - rctl &= ~E1000_RCTL_VFE; - - if (e1000_tbi_sbp_enabled_82543(&adapter->hw)) - rctl |= E1000_RCTL_SBP; - else - rctl &= ~E1000_RCTL_SBP; - - switch (adapter->rx_buffer_len) { - default: - case 2048: - rctl |= E1000_RCTL_SZ_2048; - break; - case 4096: - rctl |= E1000_RCTL_SZ_4096 | - E1000_RCTL_BSEX | E1000_RCTL_LPE; - break; - case 8192: - rctl |= E1000_RCTL_SZ_8192 | - E1000_RCTL_BSEX | E1000_RCTL_LPE; - break; - case 16384: - rctl |= E1000_RCTL_SZ_16384 | - E1000_RCTL_BSEX | E1000_RCTL_LPE; - break; - } - - if (if_getmtu(ifp) > ETHERMTU) - rctl |= E1000_RCTL_LPE; - else - rctl &= ~E1000_RCTL_LPE; - - /* Enable 82543 Receive Checksum Offload for TCP and UDP */ - if ((adapter->hw.mac.type >= e1000_82543) && - (if_getcapenable(ifp) & IFCAP_RXCSUM)) { - rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM); - rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL); - E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum); - } - - /* Enable Receives */ - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); - - /* - * Setup the HW Rx Head and - * Tail Descriptor Pointers - */ - E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0); - rctl = adapter->num_rx_desc - 1; /* default RDT value */ -#ifdef DEV_NETMAP - /* preserve buffers already made available to clients */ - if (if_getcapenable(ifp) & IFCAP_NETMAP) { - struct netmap_adapter *na = netmap_getna(adapter->ifp); - rctl -= nm_kr_rxspace(&na->rx_rings[0]); - } -#endif /* DEV_NETMAP */ - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rctl); - - return; -} - -/********************************************************************* - * - * Free receive related data structures. - * - **********************************************************************/ -static void -lem_free_receive_structures(struct adapter *adapter) -{ - struct em_buffer *rx_buffer; - int i; - - INIT_DEBUGOUT("free_receive_structures: begin"); - - if (adapter->rx_sparemap) { - bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap); - adapter->rx_sparemap = NULL; - } - - /* Cleanup any existing buffers */ - if (adapter->rx_buffer_area != NULL) { - rx_buffer = adapter->rx_buffer_area; - for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { - if (rx_buffer->m_head != NULL) { - bus_dmamap_sync(adapter->rxtag, rx_buffer->map, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(adapter->rxtag, - rx_buffer->map); - m_freem(rx_buffer->m_head); - rx_buffer->m_head = NULL; - } else if (rx_buffer->map != NULL) - bus_dmamap_unload(adapter->rxtag, - rx_buffer->map); - if (rx_buffer->map != NULL) { - bus_dmamap_destroy(adapter->rxtag, - rx_buffer->map); - rx_buffer->map = NULL; - } - } - } - - if (adapter->rx_buffer_area != NULL) { - free(adapter->rx_buffer_area, M_DEVBUF); - adapter->rx_buffer_area = NULL; - } - - if (adapter->rxtag != NULL) { - bus_dma_tag_destroy(adapter->rxtag); - adapter->rxtag = NULL; - } -} - -/********************************************************************* - * - * This routine executes in interrupt context. It replenishes - * the mbufs in the descriptor and sends data which has been - * dma'ed into host memory to upper layer. - * - * We loop at most count times if count is > 0, or until done if - * count < 0. - * - * For polling we also now return the number of cleaned packets - *********************************************************************/ -static bool -lem_rxeof(struct adapter *adapter, int count, int *done) -{ - if_t ifp = adapter->ifp; - struct mbuf *mp; - u8 status = 0, accept_frame = 0, eop = 0; - u16 len, desc_len, prev_len_adj; - int i, rx_sent = 0; - struct e1000_rx_desc *current_desc; - -#ifdef BATCH_DISPATCH - struct mbuf *mh = NULL, *mt = NULL; -#endif /* BATCH_DISPATCH */ - EM_RX_LOCK(adapter); - -#ifdef BATCH_DISPATCH - batch_again: -#endif /* BATCH_DISPATCH */ - i = adapter->next_rx_desc_to_check; - current_desc = &adapter->rx_desc_base[i]; - bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, - BUS_DMASYNC_POSTREAD); - -#ifdef DEV_NETMAP - if (netmap_rx_irq(ifp, 0, &rx_sent)) { - EM_RX_UNLOCK(adapter); - return (FALSE); - } -#endif /* DEV_NETMAP */ - - if (!((current_desc->status) & E1000_RXD_STAT_DD)) { - if (done != NULL) - *done = rx_sent; - EM_RX_UNLOCK(adapter); - return (FALSE); - } - - while (count != 0 && if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - struct mbuf *m = NULL; - - status = current_desc->status; - if ((status & E1000_RXD_STAT_DD) == 0) { - break; - } - - mp = adapter->rx_buffer_area[i].m_head; - /* - * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT - * needs to access the last received byte in the mbuf. - */ - bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map, - BUS_DMASYNC_POSTREAD); - - accept_frame = 1; - prev_len_adj = 0; - desc_len = le16toh(current_desc->length); - if (status & E1000_RXD_STAT_EOP) { - count--; - eop = 1; - if (desc_len < ETHER_CRC_LEN) { - len = 0; - prev_len_adj = ETHER_CRC_LEN - desc_len; - } else - len = desc_len - ETHER_CRC_LEN; - } else { - eop = 0; - len = desc_len; - } - - if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) { - u8 last_byte; - u32 pkt_len = desc_len; - - if (adapter->fmp != NULL) - pkt_len += adapter->fmp->m_pkthdr.len; - - last_byte = *(mtod(mp, caddr_t) + desc_len - 1); - if (TBI_ACCEPT(&adapter->hw, status, - current_desc->errors, pkt_len, last_byte, - adapter->min_frame_size, adapter->max_frame_size)) { - e1000_tbi_adjust_stats_82543(&adapter->hw, - &adapter->stats, pkt_len, - adapter->hw.mac.addr, - adapter->max_frame_size); - if (len > 0) - len--; - } else - accept_frame = 0; - } - - if (accept_frame) { - if (lem_get_buf(adapter, i) != 0) { - if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); - goto discard; - } - - /* Assign correct length to the current fragment */ - mp->m_len = len; - - if (adapter->fmp == NULL) { - mp->m_pkthdr.len = len; - adapter->fmp = mp; /* Store the first mbuf */ - adapter->lmp = mp; - } else { - /* Chain mbuf's together */ - mp->m_flags &= ~M_PKTHDR; - /* - * Adjust length of previous mbuf in chain if - * we received less than 4 bytes in the last - * descriptor. - */ - if (prev_len_adj > 0) { - adapter->lmp->m_len -= prev_len_adj; - adapter->fmp->m_pkthdr.len -= - prev_len_adj; - } - adapter->lmp->m_next = mp; - adapter->lmp = adapter->lmp->m_next; - adapter->fmp->m_pkthdr.len += len; - } - - if (eop) { - if_setrcvif(adapter->fmp, ifp); - if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); - lem_receive_checksum(adapter, current_desc, - adapter->fmp); -#ifndef __NO_STRICT_ALIGNMENT - if (adapter->max_frame_size > - (MCLBYTES - ETHER_ALIGN) && - lem_fixup_rx(adapter) != 0) - goto skip; -#endif - if (status & E1000_RXD_STAT_VP) { - adapter->fmp->m_pkthdr.ether_vtag = - le16toh(current_desc->special); - adapter->fmp->m_flags |= M_VLANTAG; - } -#ifndef __NO_STRICT_ALIGNMENT -skip: -#endif - m = adapter->fmp; - adapter->fmp = NULL; - adapter->lmp = NULL; - } - } else { - adapter->dropped_pkts++; -discard: - /* Reuse loaded DMA map and just update mbuf chain */ - mp = adapter->rx_buffer_area[i].m_head; - mp->m_len = mp->m_pkthdr.len = MCLBYTES; - mp->m_data = mp->m_ext.ext_buf; - mp->m_next = NULL; - if (adapter->max_frame_size <= - (MCLBYTES - ETHER_ALIGN)) - m_adj(mp, ETHER_ALIGN); - if (adapter->fmp != NULL) { - m_freem(adapter->fmp); - adapter->fmp = NULL; - adapter->lmp = NULL; - } - m = NULL; - } - - /* Zero out the receive descriptors status. */ - current_desc->status = 0; - bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* Advance our pointers to the next descriptor. */ - if (++i == adapter->num_rx_desc) - i = 0; - /* Call into the stack */ - if (m != NULL) { -#ifdef BATCH_DISPATCH - if (adapter->batch_enable) { - if (mh == NULL) - mh = mt = m; - else - mt->m_nextpkt = m; - mt = m; - m->m_nextpkt = NULL; - rx_sent++; - current_desc = &adapter->rx_desc_base[i]; - continue; - } -#endif /* BATCH_DISPATCH */ - adapter->next_rx_desc_to_check = i; - EM_RX_UNLOCK(adapter); - if_input(ifp, m); - EM_RX_LOCK(adapter); - rx_sent++; - i = adapter->next_rx_desc_to_check; - } - current_desc = &adapter->rx_desc_base[i]; - } - adapter->next_rx_desc_to_check = i; -#ifdef BATCH_DISPATCH - if (mh) { - EM_RX_UNLOCK(adapter); - while ( (mt = mh) != NULL) { - mh = mh->m_nextpkt; - mt->m_nextpkt = NULL; - if_input(ifp, mt); - } - EM_RX_LOCK(adapter); - i = adapter->next_rx_desc_to_check; /* in case of interrupts */ - if (count > 0) - goto batch_again; - } -#endif /* BATCH_DISPATCH */ - - /* Advance the E1000's Receive Queue #0 "Tail Pointer". */ - if (--i < 0) - i = adapter->num_rx_desc - 1; - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i); - if (done != NULL) - *done = rx_sent; - EM_RX_UNLOCK(adapter); - return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); -} - -#ifndef __NO_STRICT_ALIGNMENT -/* - * When jumbo frames are enabled we should realign entire payload on - * architecures with strict alignment. This is serious design mistake of 8254x - * as it nullifies DMA operations. 8254x just allows RX buffer size to be - * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its - * payload. On architecures without strict alignment restrictions 8254x still - * performs unaligned memory access which would reduce the performance too. - * To avoid copying over an entire frame to align, we allocate a new mbuf and - * copy ethernet header to the new mbuf. The new mbuf is prepended into the - * existing mbuf chain. - * - * Be aware, best performance of the 8254x is achieved only when jumbo frame is - * not used at all on architectures with strict alignment. - */ -static int -lem_fixup_rx(struct adapter *adapter) -{ - struct mbuf *m, *n; - int error; - - error = 0; - m = adapter->fmp; - if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { - bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); - m->m_data += ETHER_HDR_LEN; - } else { - MGETHDR(n, M_NOWAIT, MT_DATA); - if (n != NULL) { - bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); - m->m_data += ETHER_HDR_LEN; - m->m_len -= ETHER_HDR_LEN; - n->m_len = ETHER_HDR_LEN; - M_MOVE_PKTHDR(n, m); - n->m_next = m; - adapter->fmp = n; - } else { - adapter->dropped_pkts++; - m_freem(adapter->fmp); - adapter->fmp = NULL; - error = ENOMEM; - } - } - - return (error); -} -#endif - -/********************************************************************* - * - * Verify that the hardware indicated that the checksum is valid. - * Inform the stack about the status of checksum so that stack - * doesn't spend time verifying the checksum. - * - *********************************************************************/ -static void -lem_receive_checksum(struct adapter *adapter, - struct e1000_rx_desc *rx_desc, struct mbuf *mp) -{ - /* 82543 or newer only */ - if ((adapter->hw.mac.type < e1000_82543) || - /* Ignore Checksum bit is set */ - (rx_desc->status & E1000_RXD_STAT_IXSM)) { - mp->m_pkthdr.csum_flags = 0; - return; - } - - if (rx_desc->status & E1000_RXD_STAT_IPCS) { - /* Did it pass? */ - if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) { - /* IP Checksum Good */ - mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; - mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; - - } else { - mp->m_pkthdr.csum_flags = 0; - } - } - - if (rx_desc->status & E1000_RXD_STAT_TCPCS) { - /* Did it pass? */ - if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) { - mp->m_pkthdr.csum_flags |= - (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); - mp->m_pkthdr.csum_data = htons(0xffff); - } - } -} - -/* - * This routine is run via an vlan - * config EVENT - */ -static void -lem_register_vlan(void *arg, if_t ifp, u16 vtag) -{ - struct adapter *adapter = if_getsoftc(ifp); - u32 index, bit; - - if (if_getsoftc(ifp) != arg) /* Not our event */ - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ - return; - - EM_CORE_LOCK(adapter); - index = (vtag >> 5) & 0x7F; - bit = vtag & 0x1F; - adapter->shadow_vfta[index] |= (1 << bit); - ++adapter->num_vlans; - /* Re-init to load the changes */ - if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) - lem_init_locked(adapter); - EM_CORE_UNLOCK(adapter); -} - -/* - * This routine is run via an vlan - * unconfig EVENT - */ -static void -lem_unregister_vlan(void *arg, if_t ifp, u16 vtag) -{ - struct adapter *adapter = if_getsoftc(ifp); - u32 index, bit; - - if (if_getsoftc(ifp) != arg) - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - EM_CORE_LOCK(adapter); - index = (vtag >> 5) & 0x7F; - bit = vtag & 0x1F; - adapter->shadow_vfta[index] &= ~(1 << bit); - --adapter->num_vlans; - /* Re-init to load the changes */ - if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) - lem_init_locked(adapter); - EM_CORE_UNLOCK(adapter); -} - -static void -lem_setup_vlan_hw_support(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 reg; - - /* - ** We get here thru init_locked, meaning - ** a soft reset, this has already cleared - ** the VFTA and other state, so if there - ** have been no vlan's registered do nothing. - */ - if (adapter->num_vlans == 0) - return; - - /* - ** A soft reset zero's out the VFTA, so - ** we need to repopulate it now. - */ - for (int i = 0; i < EM_VFTA_SIZE; i++) - if (adapter->shadow_vfta[i] != 0) - E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, - i, adapter->shadow_vfta[i]); - - reg = E1000_READ_REG(hw, E1000_CTRL); - reg |= E1000_CTRL_VME; - E1000_WRITE_REG(hw, E1000_CTRL, reg); - - /* Enable the Filter Table */ - reg = E1000_READ_REG(hw, E1000_RCTL); - reg &= ~E1000_RCTL_CFIEN; - reg |= E1000_RCTL_VFE; - E1000_WRITE_REG(hw, E1000_RCTL, reg); -} - -static void -lem_enable_intr(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 ims_mask = IMS_ENABLE_MASK; - - E1000_WRITE_REG(hw, E1000_IMS, ims_mask); -} - -static void -lem_disable_intr(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - - E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); -} - -/* - * Bit of a misnomer, what this really means is - * to enable OS management of the system... aka - * to disable special hardware management features - */ -static void -lem_init_manageability(struct adapter *adapter) -{ - /* A shared code workaround */ - if (adapter->has_manage) { - int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); - /* disable hardware interception of ARP */ - manc &= ~(E1000_MANC_ARP_EN); - E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); - } -} - -/* - * Give control back to hardware management - * controller if there is one. - */ -static void -lem_release_manageability(struct adapter *adapter) -{ - if (adapter->has_manage) { - int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); - - /* re-enable hardware interception of ARP */ - manc |= E1000_MANC_ARP_EN; - E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); - } -} - -/* - * lem_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means - * that the driver is loaded. For AMT version type f/w - * this means that the network i/f is open. - */ -static void -lem_get_hw_control(struct adapter *adapter) -{ - u32 ctrl_ext; - - ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); - E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, - ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); - return; -} - -/* - * lem_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means that - * the driver is no longer loaded. For AMT versions of the - * f/w this means that the network i/f is closed. - */ -static void -lem_release_hw_control(struct adapter *adapter) -{ - u32 ctrl_ext; - - if (!adapter->has_manage) - return; - - ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); - E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, - ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); - return; -} - -static int -lem_is_valid_ether_addr(u8 *addr) -{ - char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; - - if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { - return (FALSE); - } - - return (TRUE); -} - -/* -** Parse the interface capabilities with regard -** to both system management and wake-on-lan for -** later use. -*/ -static void -lem_get_wakeup(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - u16 eeprom_data = 0, device_id, apme_mask; - - adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); - apme_mask = EM_EEPROM_APME; - - switch (adapter->hw.mac.type) { - case e1000_82542: - case e1000_82543: - break; - case e1000_82544: - e1000_read_nvm(&adapter->hw, - NVM_INIT_CONTROL2_REG, 1, &eeprom_data); - apme_mask = EM_82544_APME; - break; - case e1000_82546: - case e1000_82546_rev_3: - if (adapter->hw.bus.func == 1) { - e1000_read_nvm(&adapter->hw, - NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); - break; - } else - e1000_read_nvm(&adapter->hw, - NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); - break; - default: - e1000_read_nvm(&adapter->hw, - NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); - break; - } - if (eeprom_data & apme_mask) - adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); - /* - * We have the eeprom settings, now apply the special cases - * where the eeprom may be wrong or the board won't support - * wake on lan on a particular port - */ - device_id = pci_get_device(dev); - switch (device_id) { - case E1000_DEV_ID_82546GB_PCIE: - adapter->wol = 0; - break; - case E1000_DEV_ID_82546EB_FIBER: - case E1000_DEV_ID_82546GB_FIBER: - /* Wake events only supported on port A for dual fiber - * regardless of eeprom setting */ - if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & - E1000_STATUS_FUNC_1) - adapter->wol = 0; - break; - case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: - /* if quad port adapter, disable WoL on all but port A */ - if (global_quad_port_a != 0) - adapter->wol = 0; - /* Reset for multiple quad port adapters */ - if (++global_quad_port_a == 4) - global_quad_port_a = 0; - break; - } - return; -} - - -/* - * Enable PCI Wake On Lan capability - */ -static void -lem_enable_wakeup(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - if_t ifp = adapter->ifp; - u32 pmc, ctrl, ctrl_ext, rctl; - u16 status; - - if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) - return; - - /* Advertise the wakeup capability */ - ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); - ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); - E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); - E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); - - /* Keep the laser running on Fiber adapters */ - if (adapter->hw.phy.media_type == e1000_media_type_fiber || - adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { - ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); - ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; - E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); - } - - /* - ** Determine type of Wakeup: note that wol - ** is set with all bits on by default. - */ - if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) - adapter->wol &= ~E1000_WUFC_MAG; - - if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) - adapter->wol &= ~E1000_WUFC_MC; - else { - rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); - rctl |= E1000_RCTL_MPE; - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); - } - - if (adapter->hw.mac.type == e1000_pchlan) { - if (lem_enable_phy_wakeup(adapter)) - return; - } else { - E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); - E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); - } - - - /* Request PME */ - status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); - status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); - if (if_getcapenable(ifp) & IFCAP_WOL) - status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; - pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); - - return; -} - -/* -** WOL in the newer chipset interfaces (pchlan) -** require thing to be copied into the phy -*/ -static int -lem_enable_phy_wakeup(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 mreg, ret = 0; - u16 preg; - - /* copy MAC RARs to PHY RARs */ - for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) { - mreg = E1000_READ_REG(hw, E1000_RAL(i)); - e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF)); - e1000_write_phy_reg(hw, BM_RAR_M(i), - (u16)((mreg >> 16) & 0xFFFF)); - mreg = E1000_READ_REG(hw, E1000_RAH(i)); - e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF)); - e1000_write_phy_reg(hw, BM_RAR_CTRL(i), - (u16)((mreg >> 16) & 0xFFFF)); - } - - /* copy MAC MTA to PHY MTA */ - for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { - mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); - e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); - e1000_write_phy_reg(hw, BM_MTA(i) + 1, - (u16)((mreg >> 16) & 0xFFFF)); - } - - /* configure PHY Rx Control register */ - e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); - mreg = E1000_READ_REG(hw, E1000_RCTL); - if (mreg & E1000_RCTL_UPE) - preg |= BM_RCTL_UPE; - if (mreg & E1000_RCTL_MPE) - preg |= BM_RCTL_MPE; - preg &= ~(BM_RCTL_MO_MASK); - if (mreg & E1000_RCTL_MO_3) - preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) - << BM_RCTL_MO_SHIFT); - if (mreg & E1000_RCTL_BAM) - preg |= BM_RCTL_BAM; - if (mreg & E1000_RCTL_PMCF) - preg |= BM_RCTL_PMCF; - mreg = E1000_READ_REG(hw, E1000_CTRL); - if (mreg & E1000_CTRL_RFCE) - preg |= BM_RCTL_RFCE; - e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); - - /* enable PHY wakeup in MAC register */ - E1000_WRITE_REG(hw, E1000_WUC, - E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); - E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); - - /* configure and enable PHY wakeup in PHY registers */ - e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); - e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); - - /* activate PHY wakeup */ - ret = hw->phy.ops.acquire(hw); - if (ret) { - printf("Could not acquire PHY\n"); - return ret; - } - e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, - (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); - ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); - if (ret) { - printf("Could not read PHY page 769\n"); - goto out; - } - preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; - ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); - if (ret) - printf("Could not set PHY Host Wakeup bit\n"); -out: - hw->phy.ops.release(hw); - - return ret; -} - -static void -lem_led_func(void *arg, int onoff) -{ - struct adapter *adapter = arg; - - EM_CORE_LOCK(adapter); - if (onoff) { - e1000_setup_led(&adapter->hw); - e1000_led_on(&adapter->hw); - } else { - e1000_led_off(&adapter->hw); - e1000_cleanup_led(&adapter->hw); - } - EM_CORE_UNLOCK(adapter); -} - -/********************************************************************* -* 82544 Coexistence issue workaround. -* There are 2 issues. -* 1. Transmit Hang issue. -* To detect this issue, following equation can be used... -* SIZE[3:0] + ADDR[2:0] = SUM[3:0]. -* If SUM[3:0] is in between 1 to 4, we will have this issue. -* -* 2. DAC issue. -* To detect this issue, following equation can be used... -* SIZE[3:0] + ADDR[2:0] = SUM[3:0]. -* If SUM[3:0] is in between 9 to c, we will have this issue. -* -* -* WORKAROUND: -* Make sure we do not have ending address -* as 1,2,3,4(Hang) or 9,a,b,c (DAC) -* -*************************************************************************/ -static u32 -lem_fill_descriptors (bus_addr_t address, u32 length, - PDESC_ARRAY desc_array) -{ - u32 safe_terminator; - - /* Since issue is sensitive to length and address.*/ - /* Let us first check the address...*/ - if (length <= 4) { - desc_array->descriptor[0].address = address; - desc_array->descriptor[0].length = length; - desc_array->elements = 1; - return (desc_array->elements); - } - safe_terminator = (u32)((((u32)address & 0x7) + - (length & 0xF)) & 0xF); - /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */ - if (safe_terminator == 0 || - (safe_terminator > 4 && - safe_terminator < 9) || - (safe_terminator > 0xC && - safe_terminator <= 0xF)) { - desc_array->descriptor[0].address = address; - desc_array->descriptor[0].length = length; - desc_array->elements = 1; - return (desc_array->elements); - } - - desc_array->descriptor[0].address = address; - desc_array->descriptor[0].length = length - 4; - desc_array->descriptor[1].address = address + (length - 4); - desc_array->descriptor[1].length = 4; - desc_array->elements = 2; - return (desc_array->elements); -} - -/********************************************************************** - * - * Update the board statistics counters. - * - **********************************************************************/ -static void -lem_update_stats_counters(struct adapter *adapter) -{ - - if(adapter->hw.phy.media_type == e1000_media_type_copper || - (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { - adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); - adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); - } - adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); - adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); - adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); - adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); - - adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); - adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); - adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); - adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); - adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); - adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); - adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); - adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); - adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); - adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); - adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); - adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); - adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); - adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); - adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); - adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); - adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); - adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); - adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); - adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); - - /* For the 64-bit byte counters the low dword must be read first. */ - /* Both registers clear on the read of the high dword */ - - adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + - ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); - adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + - ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); - - adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); - adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); - adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); - adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); - adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); - - adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); - adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); - - adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); - adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); - adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); - adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); - adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); - adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); - adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); - adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); - adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); - adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); - - if (adapter->hw.mac.type >= e1000_82543) { - adapter->stats.algnerrc += - E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); - adapter->stats.rxerrc += - E1000_READ_REG(&adapter->hw, E1000_RXERRC); - adapter->stats.tncrs += - E1000_READ_REG(&adapter->hw, E1000_TNCRS); - adapter->stats.cexterr += - E1000_READ_REG(&adapter->hw, E1000_CEXTERR); - adapter->stats.tsctc += - E1000_READ_REG(&adapter->hw, E1000_TSCTC); - adapter->stats.tsctfc += - E1000_READ_REG(&adapter->hw, E1000_TSCTFC); - } -} - -static uint64_t -lem_get_counter(if_t ifp, ift_counter cnt) -{ - struct adapter *adapter; - - adapter = if_getsoftc(ifp); - - switch (cnt) { - case IFCOUNTER_COLLISIONS: - return (adapter->stats.colc); - case IFCOUNTER_IERRORS: - return (adapter->dropped_pkts + adapter->stats.rxerrc + - adapter->stats.crcerrs + adapter->stats.algnerrc + - adapter->stats.ruc + adapter->stats.roc + - adapter->stats.mpc + adapter->stats.cexterr); - case IFCOUNTER_OERRORS: - return (adapter->stats.ecol + adapter->stats.latecol + - adapter->watchdog_events); - default: - return (if_get_counter_default(ifp, cnt)); - } -} - -/* Export a single 32-bit register via a read-only sysctl. */ -static int -lem_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter; - u_int val; - - adapter = oidp->oid_arg1; - val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); - return (sysctl_handle_int(oidp, &val, 0, req)); -} - -/* - * Add sysctl variables, one per statistic, to the system. - */ -static void -lem_add_hw_stats(struct adapter *adapter) -{ - device_t dev = adapter->dev; - - struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); - struct sysctl_oid *tree = device_get_sysctl_tree(dev); - struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); - struct e1000_hw_stats *stats = &adapter->stats; - - struct sysctl_oid *stat_node; - struct sysctl_oid_list *stat_list; - - /* Driver Statistics */ - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", - CTLFLAG_RD, &adapter->mbuf_cluster_failed, - "Std mbuf cluster failed"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", - CTLFLAG_RD, &adapter->mbuf_defrag_failed, - "Defragmenting mbuf chain failed"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", - CTLFLAG_RD, &adapter->dropped_pkts, - "Driver dropped packets"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", - CTLFLAG_RD, &adapter->no_tx_dma_setup, - "Driver tx dma failure in xmit"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_desc_fail1", - CTLFLAG_RD, &adapter->no_tx_desc_avail1, - "Not enough tx descriptors failure in xmit"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_desc_fail2", - CTLFLAG_RD, &adapter->no_tx_desc_avail2, - "Not enough tx descriptors failure in xmit"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", - CTLFLAG_RD, &adapter->rx_overruns, - "RX overruns"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", - CTLFLAG_RD, &adapter->watchdog_events, - "Watchdog timeouts"); - - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, - lem_sysctl_reg_handler, "IU", - "Device Control Register"); - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, - lem_sysctl_reg_handler, "IU", - "Receiver Control Register"); - SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", - CTLFLAG_RD, &adapter->hw.fc.high_water, 0, - "Flow Control High Watermark"); - SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", - CTLFLAG_RD, &adapter->hw.fc.low_water, 0, - "Flow Control Low Watermark"); - SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "fifo_workaround", - CTLFLAG_RD, &adapter->tx_fifo_wrk_cnt, - "TX FIFO workaround events"); - SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "fifo_reset", - CTLFLAG_RD, &adapter->tx_fifo_reset_cnt, - "TX FIFO resets"); - - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txd_head", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(0), - lem_sysctl_reg_handler, "IU", - "Transmit Descriptor Head"); - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txd_tail", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(0), - lem_sysctl_reg_handler, "IU", - "Transmit Descriptor Tail"); - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxd_head", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(0), - lem_sysctl_reg_handler, "IU", - "Receive Descriptor Head"); - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxd_tail", - CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(0), - lem_sysctl_reg_handler, "IU", - "Receive Descriptor Tail"); - - - /* MAC stats get their own sub node */ - - stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", - CTLFLAG_RD, NULL, "Statistics"); - stat_list = SYSCTL_CHILDREN(stat_node); - - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", - CTLFLAG_RD, &stats->ecol, - "Excessive collisions"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", - CTLFLAG_RD, &stats->scc, - "Single collisions"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", - CTLFLAG_RD, &stats->mcc, - "Multiple collisions"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", - CTLFLAG_RD, &stats->latecol, - "Late collisions"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", - CTLFLAG_RD, &stats->colc, - "Collision Count"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", - CTLFLAG_RD, &adapter->stats.symerrs, - "Symbol Errors"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", - CTLFLAG_RD, &adapter->stats.sec, - "Sequence Errors"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", - CTLFLAG_RD, &adapter->stats.dc, - "Defer Count"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", - CTLFLAG_RD, &adapter->stats.mpc, - "Missed Packets"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", - CTLFLAG_RD, &adapter->stats.rnbc, - "Receive No Buffers"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", - CTLFLAG_RD, &adapter->stats.ruc, - "Receive Undersize"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", - CTLFLAG_RD, &adapter->stats.rfc, - "Fragmented Packets Received "); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", - CTLFLAG_RD, &adapter->stats.roc, - "Oversized Packets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", - CTLFLAG_RD, &adapter->stats.rjc, - "Recevied Jabber"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", - CTLFLAG_RD, &adapter->stats.rxerrc, - "Receive Errors"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", - CTLFLAG_RD, &adapter->stats.crcerrs, - "CRC errors"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", - CTLFLAG_RD, &adapter->stats.algnerrc, - "Alignment Errors"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", - CTLFLAG_RD, &adapter->stats.cexterr, - "Collision/Carrier extension errors"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", - CTLFLAG_RD, &adapter->stats.xonrxc, - "XON Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", - CTLFLAG_RD, &adapter->stats.xontxc, - "XON Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", - CTLFLAG_RD, &adapter->stats.xoffrxc, - "XOFF Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", - CTLFLAG_RD, &adapter->stats.xofftxc, - "XOFF Transmitted"); - - /* Packet Reception Stats */ - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", - CTLFLAG_RD, &adapter->stats.tpr, - "Total Packets Received "); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", - CTLFLAG_RD, &adapter->stats.gprc, - "Good Packets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", - CTLFLAG_RD, &adapter->stats.bprc, - "Broadcast Packets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", - CTLFLAG_RD, &adapter->stats.mprc, - "Multicast Packets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", - CTLFLAG_RD, &adapter->stats.prc64, - "64 byte frames received "); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", - CTLFLAG_RD, &adapter->stats.prc127, - "65-127 byte frames received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", - CTLFLAG_RD, &adapter->stats.prc255, - "128-255 byte frames received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", - CTLFLAG_RD, &adapter->stats.prc511, - "256-511 byte frames received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", - CTLFLAG_RD, &adapter->stats.prc1023, - "512-1023 byte frames received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", - CTLFLAG_RD, &adapter->stats.prc1522, - "1023-1522 byte frames received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", - CTLFLAG_RD, &adapter->stats.gorc, - "Good Octets Received"); - - /* Packet Transmission Stats */ - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", - CTLFLAG_RD, &adapter->stats.gotc, - "Good Octets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", - CTLFLAG_RD, &adapter->stats.tpt, - "Total Packets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", - CTLFLAG_RD, &adapter->stats.gptc, - "Good Packets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", - CTLFLAG_RD, &adapter->stats.bptc, - "Broadcast Packets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", - CTLFLAG_RD, &adapter->stats.mptc, - "Multicast Packets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", - CTLFLAG_RD, &adapter->stats.ptc64, - "64 byte frames transmitted "); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", - CTLFLAG_RD, &adapter->stats.ptc127, - "65-127 byte frames transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", - CTLFLAG_RD, &adapter->stats.ptc255, - "128-255 byte frames transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", - CTLFLAG_RD, &adapter->stats.ptc511, - "256-511 byte frames transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", - CTLFLAG_RD, &adapter->stats.ptc1023, - "512-1023 byte frames transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", - CTLFLAG_RD, &adapter->stats.ptc1522, - "1024-1522 byte frames transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", - CTLFLAG_RD, &adapter->stats.tsctc, - "TSO Contexts Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", - CTLFLAG_RD, &adapter->stats.tsctfc, - "TSO Contexts Failed"); -} - -/********************************************************************** - * - * This routine provides a way to dump out the adapter eeprom, - * often a useful debug/service tool. This only dumps the first - * 32 words, stuff that matters is in that extent. - * - **********************************************************************/ - -static int -lem_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter; - int error; - int result; - - result = -1; - error = sysctl_handle_int(oidp, &result, 0, req); - - if (error || !req->newptr) - return (error); - - /* - * This value will cause a hex dump of the - * first 32 16-bit words of the EEPROM to - * the screen. - */ - if (result == 1) { - adapter = (struct adapter *)arg1; - lem_print_nvm_info(adapter); - } - - return (error); -} - -static void -lem_print_nvm_info(struct adapter *adapter) -{ - u16 eeprom_data; - int i, j, row = 0; - - /* Its a bit crude, but it gets the job done */ - printf("\nInterface EEPROM Dump:\n"); - printf("Offset\n0x0000 "); - for (i = 0, j = 0; i < 32; i++, j++) { - if (j == 8) { /* Make the offset block */ - j = 0; ++row; - printf("\n0x00%x0 ",row); - } - e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); - printf("%04x ", eeprom_data); - } - printf("\n"); -} - -static int -lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS) -{ - struct em_int_delay_info *info; - struct adapter *adapter; - u32 regval; - int error; - int usecs; - int ticks; - - info = (struct em_int_delay_info *)arg1; - usecs = info->value; - error = sysctl_handle_int(oidp, &usecs, 0, req); - if (error != 0 || req->newptr == NULL) - return (error); - if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) - return (EINVAL); - info->value = usecs; - ticks = EM_USECS_TO_TICKS(usecs); - if (info->offset == E1000_ITR) /* units are 256ns here */ - ticks *= 4; - - adapter = info->adapter; - - EM_CORE_LOCK(adapter); - regval = E1000_READ_OFFSET(&adapter->hw, info->offset); - regval = (regval & ~0xffff) | (ticks & 0xffff); - /* Handle a few special cases. */ - switch (info->offset) { - case E1000_RDTR: - break; - case E1000_TIDV: - if (ticks == 0) { - adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; - /* Don't write 0 into the TIDV register. */ - regval++; - } else - adapter->txd_cmd |= E1000_TXD_CMD_IDE; - break; - } - E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); - EM_CORE_UNLOCK(adapter); - return (0); -} - -static void -lem_add_int_delay_sysctl(struct adapter *adapter, const char *name, - const char *description, struct em_int_delay_info *info, - int offset, int value) -{ - info->adapter = adapter; - info->offset = offset; - info->value = value; - SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, - info, 0, lem_sysctl_int_delay, "I", description); -} - -static void -lem_set_flow_cntrl(struct adapter *adapter, const char *name, - const char *description, int *limit, int value) -{ - *limit = value; - SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLFLAG_RW, limit, value, description); -} - -static void -lem_add_rx_process_limit(struct adapter *adapter, const char *name, - const char *description, int *limit, int value) -{ - *limit = value; - SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLFLAG_RW, limit, value, description); -} diff --git a/freebsd/sys/dev/e1000/if_lem.h b/freebsd/sys/dev/e1000/if_lem.h deleted file mode 100644 index 4a27c34b..00000000 --- a/freebsd/sys/dev/e1000/if_lem.h +++ /dev/null @@ -1,519 +0,0 @@ -/****************************************************************************** - - Copyright (c) 2001-2015, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - -******************************************************************************/ -/*$FreeBSD$*/ - - -#ifndef _LEM_H_DEFINED_ -#define _LEM_H_DEFINED_ - - -/* Tunables */ - -/* - * EM_TXD: Maximum number of Transmit Descriptors - * Valid Range: 80-256 for 82542 and 82543-based adapters - * 80-4096 for others - * Default Value: 256 - * This value is the number of transmit descriptors allocated by the driver. - * Increasing this value allows the driver to queue more transmits. Each - * descriptor is 16 bytes. - * Since TDLEN should be multiple of 128bytes, the number of transmit - * desscriptors should meet the following condition. - * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 - */ -#define EM_MIN_TXD 80 -#define EM_MAX_TXD_82543 256 -#define EM_MAX_TXD 4096 -#define EM_DEFAULT_TXD EM_MAX_TXD_82543 - -/* - * EM_RXD - Maximum number of receive Descriptors - * Valid Range: 80-256 for 82542 and 82543-based adapters - * 80-4096 for others - * Default Value: 256 - * This value is the number of receive descriptors allocated by the driver. - * Increasing this value allows the driver to buffer more incoming packets. - * Each descriptor is 16 bytes. A receive buffer is also allocated for each - * descriptor. The maximum MTU size is 16110. - * Since TDLEN should be multiple of 128bytes, the number of transmit - * desscriptors should meet the following condition. - * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 - */ -#define EM_MIN_RXD 80 -#define EM_MAX_RXD_82543 256 -#define EM_MAX_RXD 4096 -#define EM_DEFAULT_RXD EM_MAX_RXD_82543 - -/* - * EM_TIDV - Transmit Interrupt Delay Value - * Valid Range: 0-65535 (0=off) - * Default Value: 64 - * This value delays the generation of transmit interrupts in units of - * 1.024 microseconds. Transmit interrupt reduction can improve CPU - * efficiency if properly tuned for specific network traffic. If the - * system is reporting dropped transmits, this value may be set too high - * causing the driver to run out of available transmit descriptors. - */ -#define EM_TIDV 64 - -/* - * EM_TADV - Transmit Absolute Interrupt Delay Value - * (Not valid for 82542/82543/82544) - * Valid Range: 0-65535 (0=off) - * Default Value: 64 - * This value, in units of 1.024 microseconds, limits the delay in which a - * transmit interrupt is generated. Useful only if EM_TIDV is non-zero, - * this value ensures that an interrupt is generated after the initial - * packet is sent on the wire within the set amount of time. Proper tuning, - * along with EM_TIDV, may improve traffic throughput in specific - * network conditions. - */ -#define EM_TADV 64 - -/* - * EM_RDTR - Receive Interrupt Delay Timer (Packet Timer) - * Valid Range: 0-65535 (0=off) - * Default Value: 0 - * This value delays the generation of receive interrupts in units of 1.024 - * microseconds. Receive interrupt reduction can improve CPU efficiency if - * properly tuned for specific network traffic. Increasing this value adds - * extra latency to frame reception and can end up decreasing the throughput - * of TCP traffic. If the system is reporting dropped receives, this value - * may be set too high, causing the driver to run out of available receive - * descriptors. - * - * CAUTION: When setting EM_RDTR to a value other than 0, adapters - * may hang (stop transmitting) under certain network conditions. - * If this occurs a WATCHDOG message is logged in the system - * event log. In addition, the controller is automatically reset, - * restoring the network connection. To eliminate the potential - * for the hang ensure that EM_RDTR is set to 0. - */ -#define EM_RDTR 0 - -/* - * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) - * Valid Range: 0-65535 (0=off) - * Default Value: 64 - * This value, in units of 1.024 microseconds, limits the delay in which a - * receive interrupt is generated. Useful only if EM_RDTR is non-zero, - * this value ensures that an interrupt is generated after the initial - * packet is received within the set amount of time. Proper tuning, - * along with EM_RDTR, may improve traffic throughput in specific network - * conditions. - */ -#define EM_RADV 64 - -/* - * This parameter controls the max duration of transmit watchdog. - */ -#define EM_WATCHDOG (10 * hz) - -/* - * This parameter controls when the driver calls the routine to reclaim - * transmit descriptors. - */ -#define EM_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 8) -#define EM_TX_OP_THRESHOLD (adapter->num_tx_desc / 32) - -/* - * This parameter controls whether or not autonegotation is enabled. - * 0 - Disable autonegotiation - * 1 - Enable autonegotiation - */ -#define DO_AUTO_NEG 1 - -/* - * This parameter control whether or not the driver will wait for - * autonegotiation to complete. - * 1 - Wait for autonegotiation to complete - * 0 - Don't wait for autonegotiation to complete - */ -#define WAIT_FOR_AUTO_NEG_DEFAULT 0 - -/* Tunables -- End */ - -#define AUTONEG_ADV_DEFAULT (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ - ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ - ADVERTISE_1000_FULL) - -#define AUTO_ALL_MODES 0 - -/* PHY master/slave setting */ -#define EM_MASTER_SLAVE e1000_ms_hw_default - -/* - * Micellaneous constants - */ -#define EM_VENDOR_ID 0x8086 -#define EM_FLASH 0x0014 - -#define EM_JUMBO_PBA 0x00000028 -#define EM_DEFAULT_PBA 0x00000030 -#define EM_SMARTSPEED_DOWNSHIFT 3 -#define EM_SMARTSPEED_MAX 15 -#define EM_MAX_LOOP 10 - -#define MAX_NUM_MULTICAST_ADDRESSES 128 -#define PCI_ANY_ID (~0U) -#define ETHER_ALIGN 2 -#define EM_FC_PAUSE_TIME 0x0680 -#define EM_EEPROM_APME 0x400; -#define EM_82544_APME 0x0004; - -/* Code compatilbility between 6 and 7 */ -#ifndef ETHER_BPF_MTAP -#define ETHER_BPF_MTAP BPF_MTAP -#endif - -/* - * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be - * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will - * also optimize cache line size effect. H/W supports up to cache line size 128. - */ -#define EM_DBA_ALIGN 128 - -#define SPEED_MODE_BIT (1<<21) /* On PCI-E MACs only */ - -/* PCI Config defines */ -#define EM_BAR_TYPE(v) ((v) & EM_BAR_TYPE_MASK) -#define EM_BAR_TYPE_MASK 0x00000001 -#define EM_BAR_TYPE_MMEM 0x00000000 -#define EM_BAR_TYPE_IO 0x00000001 -#define EM_BAR_TYPE_FLASH 0x0014 -#define EM_BAR_MEM_TYPE(v) ((v) & EM_BAR_MEM_TYPE_MASK) -#define EM_BAR_MEM_TYPE_MASK 0x00000006 -#define EM_BAR_MEM_TYPE_32BIT 0x00000000 -#define EM_BAR_MEM_TYPE_64BIT 0x00000004 -#define EM_MSIX_BAR 3 /* On 82575 */ - -#if __FreeBSD_version < 900000 -#define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD -#endif - -/* Defines for printing debug information */ -#define DEBUG_INIT 0 -#define DEBUG_IOCTL 0 -#define DEBUG_HW 0 - -#define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") -#define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) -#define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) -#define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") -#define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) -#define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) -#define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") -#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) -#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) - -#define EM_MAX_SCATTER 40 -#define EM_VFTA_SIZE 128 -#define EM_MSIX_MASK 0x01F00000 /* For 82574 use */ -#define ETH_ZLEN 60 -#define ETH_ADDR_LEN 6 -#define CSUM_OFFLOAD 7 /* Offload bits in mbuf flag */ - -/* - * 82574 has a nonstandard address for EIAC - * and since its only used in MSIX, and in - * the em driver only 82574 uses MSIX we can - * solve it just using this define. - */ -#define EM_EIAC 0x000DC - -/* Used in for 82547 10Mb Half workaround */ -#define EM_PBA_BYTES_SHIFT 0xA -#define EM_TX_HEAD_ADDR_SHIFT 7 -#define EM_PBA_TX_MASK 0xFFFF0000 -#define EM_FIFO_HDR 0x10 -#define EM_82547_PKT_THRESH 0x3e0 - -/* Precision Time Sync (IEEE 1588) defines */ -#define ETHERTYPE_IEEE1588 0x88F7 -#define PICOSECS_PER_TICK 20833 -#define TSYNC_PORT 319 /* UDP port for the protocol */ - -#ifdef NIC_PARAVIRT -#define E1000_PARA_SUBDEV 0x1101 /* special id */ -#define E1000_CSBAL 0x02830 /* csb phys. addr. low */ -#define E1000_CSBAH 0x02834 /* csb phys. addr. hi */ -#include -#endif /* NIC_PARAVIRT */ - -/* - * Bus dma allocation structure used by - * e1000_dma_malloc and e1000_dma_free. - */ -struct em_dma_alloc { - bus_addr_t dma_paddr; - caddr_t dma_vaddr; - bus_dma_tag_t dma_tag; - bus_dmamap_t dma_map; - bus_dma_segment_t dma_seg; - int dma_nseg; -}; - -struct adapter; - -struct em_int_delay_info { - struct adapter *adapter; /* Back-pointer to the adapter struct */ - int offset; /* Register offset to read/write */ - int value; /* Current value in usecs */ -}; - -/* Our adapter structure */ -struct adapter { - if_t ifp; - struct e1000_hw hw; - - /* FreeBSD operating-system-specific structures. */ - struct e1000_osdep osdep; - device_t dev; - struct cdev *led_dev; - - struct resource *memory; - struct resource *flash; - struct resource *msix; - - struct resource *ioport; - int io_rid; - - /* 82574 may use 3 int vectors */ - struct resource *res[3]; - void *tag[3]; - int rid[3]; - - struct ifmedia media; - struct callout timer; - struct callout tx_fifo_timer; - bool watchdog_check; - int watchdog_time; - int msi; - int if_flags; - int max_frame_size; - int min_frame_size; - struct mtx core_mtx; - struct mtx tx_mtx; - struct mtx rx_mtx; - int em_insert_vlan_header; - - /* Task for FAST handling */ - struct task link_task; - struct task rxtx_task; - struct task rx_task; - struct task tx_task; - struct taskqueue *tq; /* private task queue */ - - eventhandler_tag vlan_attach; - eventhandler_tag vlan_detach; - u32 num_vlans; - - /* Management and WOL features */ - u32 wol; - bool has_manage; - bool has_amt; - - /* Multicast array memory */ - u8 *mta; - - /* - ** Shadow VFTA table, this is needed because - ** the real vlan filter table gets cleared during - ** a soft reset and the driver needs to be able - ** to repopulate it. - */ - u32 shadow_vfta[EM_VFTA_SIZE]; - - /* Info about the interface */ - uint8_t link_active; - uint16_t link_speed; - uint16_t link_duplex; - uint32_t smartspeed; - uint32_t fc_setting; - - struct em_int_delay_info tx_int_delay; - struct em_int_delay_info tx_abs_int_delay; - struct em_int_delay_info rx_int_delay; - struct em_int_delay_info rx_abs_int_delay; - struct em_int_delay_info tx_itr; - - /* - * Transmit definitions - * - * We have an array of num_tx_desc descriptors (handled - * by the controller) paired with an array of tx_buffers - * (at tx_buffer_area). - * The index of the next available descriptor is next_avail_tx_desc. - * The number of remaining tx_desc is num_tx_desc_avail. - */ - struct em_dma_alloc txdma; /* bus_dma glue for tx desc */ - struct e1000_tx_desc *tx_desc_base; - uint32_t next_avail_tx_desc; - uint32_t next_tx_to_clean; - volatile uint16_t num_tx_desc_avail; - uint16_t num_tx_desc; - uint16_t last_hw_offload; - uint32_t txd_cmd; - struct em_buffer *tx_buffer_area; - bus_dma_tag_t txtag; /* dma tag for tx */ - uint32_t tx_tso; /* last tx was tso */ - - /* - * Receive definitions - * - * we have an array of num_rx_desc rx_desc (handled by the - * controller), and paired with an array of rx_buffers - * (at rx_buffer_area). - * The next pair to check on receive is at offset next_rx_desc_to_check - */ - struct em_dma_alloc rxdma; /* bus_dma glue for rx desc */ - struct e1000_rx_desc *rx_desc_base; - uint32_t next_rx_desc_to_check; - uint32_t rx_buffer_len; - uint16_t num_rx_desc; - int rx_process_limit; - struct em_buffer *rx_buffer_area; - bus_dma_tag_t rxtag; - bus_dmamap_t rx_sparemap; - - /* - * First/last mbuf pointers, for - * collecting multisegment RX packets. - */ - struct mbuf *fmp; - struct mbuf *lmp; - - /* Misc stats maintained by the driver */ - unsigned long dropped_pkts; - unsigned long link_irq; - unsigned long mbuf_cluster_failed; - unsigned long mbuf_defrag_failed; - unsigned long no_tx_desc_avail1; - unsigned long no_tx_desc_avail2; - unsigned long no_tx_dma_setup; - unsigned long no_tx_map_avail; - unsigned long watchdog_events; - unsigned long rx_irq; - unsigned long rx_overruns; - unsigned long tx_irq; - - /* 82547 workaround */ - uint32_t tx_fifo_size; - uint32_t tx_fifo_head; - uint32_t tx_fifo_head_addr; - uint64_t tx_fifo_reset_cnt; - uint64_t tx_fifo_wrk_cnt; - uint32_t tx_head_addr; - - /* For 82544 PCIX Workaround */ - boolean_t pcix_82544; - boolean_t in_detach; - -#ifdef NIC_SEND_COMBINING - /* 0 = idle; 1xxxx int-pending; 3xxxx int + d pending + tdt */ -#define MIT_PENDING_INT 0x10000 /* pending interrupt */ -#define MIT_PENDING_TDT 0x30000 /* both intr and tdt write are pending */ - uint32_t shadow_tdt; - uint32_t sc_enable; -#endif /* NIC_SEND_COMBINING */ -#ifdef BATCH_DISPATCH - uint32_t batch_enable; -#endif /* BATCH_DISPATCH */ - -#ifdef NIC_PARAVIRT - struct em_dma_alloc csb_mem; /* phys address */ - struct paravirt_csb *csb; /* virtual addr */ - uint32_t rx_retries; /* optimize rx loop */ - uint32_t tdt_csb_count;// XXX stat - uint32_t tdt_reg_count;// XXX stat - uint32_t tdt_int_count;// XXX stat - uint32_t guest_need_kick_count;// XXX stat -#endif /* NIC_PARAVIRT */ - - struct e1000_hw_stats stats; -}; - -/* ****************************************************************************** - * vendor_info_array - * - * This array contains the list of Subvendor/Subdevice IDs on which the driver - * should load. - * - * ******************************************************************************/ -typedef struct _em_vendor_info_t { - unsigned int vendor_id; - unsigned int device_id; - unsigned int subvendor_id; - unsigned int subdevice_id; - unsigned int index; -} em_vendor_info_t; - -struct em_buffer { - int next_eop; /* Index of the desc to watch */ - struct mbuf *m_head; - bus_dmamap_t map; /* bus_dma map for packet */ -}; - -/* For 82544 PCIX Workaround */ -typedef struct _ADDRESS_LENGTH_PAIR -{ - uint64_t address; - uint32_t length; -} ADDRESS_LENGTH_PAIR, *PADDRESS_LENGTH_PAIR; - -typedef struct _DESCRIPTOR_PAIR -{ - ADDRESS_LENGTH_PAIR descriptor[4]; - uint32_t elements; -} DESC_ARRAY, *PDESC_ARRAY; - -#define EM_CORE_LOCK_INIT(_sc, _name) \ - mtx_init(&(_sc)->core_mtx, _name, "EM Core Lock", MTX_DEF) -#define EM_TX_LOCK_INIT(_sc, _name) \ - mtx_init(&(_sc)->tx_mtx, _name, "EM TX Lock", MTX_DEF) -#define EM_RX_LOCK_INIT(_sc, _name) \ - mtx_init(&(_sc)->rx_mtx, _name, "EM RX Lock", MTX_DEF) -#define EM_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) -#define EM_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) -#define EM_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) -#define EM_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) -#define EM_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) -#define EM_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) -#define EM_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) -#define EM_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) -#define EM_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) -#define EM_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) -#define EM_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) -#define EM_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) - -#endif /* _LEM_H_DEFINED_ */ diff --git a/freebsd/sys/dev/e1000/igb_txrx.c b/freebsd/sys/dev/e1000/igb_txrx.c new file mode 100644 index 00000000..2ed24e2d --- /dev/null +++ b/freebsd/sys/dev/e1000/igb_txrx.c @@ -0,0 +1,586 @@ +#include + +/*- + * Copyright (c) 2016 Matt Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* $FreeBSD$ */ +#include "if_em.h" + +#ifdef RSS +#include +#include +#endif + +#ifdef VERBOSE_DEBUG +#define DPRINTF device_printf +#else +#define DPRINTF(...) +#endif + +/********************************************************************* + * Local Function prototypes + *********************************************************************/ +static int igb_isc_txd_encap(void *arg, if_pkt_info_t pi); +static void igb_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); +static int igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear); + +static void igb_isc_rxd_refill(void *arg, if_rxd_update_t iru); + +static void igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx); +static int igb_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget); + +static int igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); + +static int igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status); +static int igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status); + +static void igb_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype); +static int igb_determine_rsstype(u16 pkt_info); + +extern void igb_if_enable_intr(if_ctx_t ctx); +extern int em_intr(void *arg); + +struct if_txrx igb_txrx = { + igb_isc_txd_encap, + igb_isc_txd_flush, + igb_isc_txd_credits_update, + igb_isc_rxd_available, + igb_isc_rxd_pkt_get, + igb_isc_rxd_refill, + igb_isc_rxd_flush, + em_intr +}; + +extern if_shared_ctx_t em_sctx; + +/********************************************************************** + * + * Setup work for hardware segmentation offload (TSO) on + * adapters using advanced tx descriptors + * + **********************************************************************/ +static int +igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status) +{ + struct e1000_adv_tx_context_desc *TXD; + struct adapter *adapter = txr->adapter; + u32 type_tucmd_mlhl = 0, vlan_macip_lens = 0; + u32 mss_l4len_idx = 0; + u32 paylen; + + switch(pi->ipi_etype) { + case ETHERTYPE_IPV6: + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; + break; + case ETHERTYPE_IP: + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; + /* Tell transmit desc to also do IPv4 checksum. */ + *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; + break; + default: + panic("%s: CSUM_TSO but no supported IP version (0x%04x)", + __func__, ntohs(pi->ipi_etype)); + break; + } + + TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx]; + + /* This is used in the transmit desc in encap */ + paylen = pi->ipi_len - pi->ipi_ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen; + + /* VLAN MACLEN IPLEN */ + if (pi->ipi_mflags & M_VLANTAG) { + vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT); + } + + vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= pi->ipi_ip_hlen; + TXD->vlan_macip_lens = htole32(vlan_macip_lens); + + /* ADV DTYPE TUCMD */ + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; + TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); + + /* MSS L4LEN IDX */ + mss_l4len_idx |= (pi->ipi_tso_segsz << E1000_ADVTXD_MSS_SHIFT); + mss_l4len_idx |= (pi->ipi_tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); + /* 82575 needs the queue index added */ + if (adapter->hw.mac.type == e1000_82575) + mss_l4len_idx |= txr->me << 4; + TXD->mss_l4len_idx = htole32(mss_l4len_idx); + + TXD->seqnum_seed = htole32(0); + *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; + + return (1); +} + +/********************************************************************* + * + * Advanced Context Descriptor setup for VLAN, CSUM or TSO + * + **********************************************************************/ +static int +igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status) +{ + struct e1000_adv_tx_context_desc *TXD; + struct adapter *adapter = txr->adapter; + u32 vlan_macip_lens, type_tucmd_mlhl; + u32 mss_l4len_idx; + mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0; + int offload = TRUE; + + /* First check if TSO is to be used */ + if (pi->ipi_csum_flags & CSUM_TSO) + return (igb_tso_setup(txr, pi, cmd_type_len, olinfo_status)); + + /* Indicate the whole packet as payload when not doing TSO */ + *olinfo_status |= pi->ipi_len << E1000_ADVTXD_PAYLEN_SHIFT; + + /* Now ready a context descriptor */ + TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx]; + + /* + ** In advanced descriptors the vlan tag must + ** be placed into the context descriptor. Hence + ** we need to make one even if not doing offloads. + */ + if (pi->ipi_mflags & M_VLANTAG) { + vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT); + } else if ((pi->ipi_csum_flags & IGB_CSUM_OFFLOAD) == 0) { + return (0); + } + + /* Set the ether header length */ + vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; + + switch(pi->ipi_etype) { + case ETHERTYPE_IP: + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; + break; + case ETHERTYPE_IPV6: + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; + break; + default: + offload = FALSE; + break; + } + + vlan_macip_lens |= pi->ipi_ip_hlen; + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + + switch (pi->ipi_ipproto) { + case IPPROTO_TCP: + if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; + break; + case IPPROTO_UDP: + if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; + break; + case IPPROTO_SCTP: + if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; + break; + default: + offload = FALSE; + break; + } + + if (offload) /* For the TX descriptor setup */ + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + + /* 82575 needs the queue index added */ + if (adapter->hw.mac.type == e1000_82575) + mss_l4len_idx = txr->me << 4; + + /* Now copy bits into descriptor */ + TXD->vlan_macip_lens = htole32(vlan_macip_lens); + TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); + TXD->seqnum_seed = htole32(0); + TXD->mss_l4len_idx = htole32(mss_l4len_idx); + + return (1); +} + +static int +igb_isc_txd_encap(void *arg, if_pkt_info_t pi) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; + struct tx_ring *txr = &que->txr; + int nsegs = pi->ipi_nsegs; + bus_dma_segment_t *segs = pi->ipi_segs; + union e1000_adv_tx_desc *txd = NULL; + int i, j, first, pidx_last; + u32 olinfo_status, cmd_type_len, txd_flags; + qidx_t ntxd; + + pidx_last = olinfo_status = 0; + /* Basic descriptor defines */ + cmd_type_len = (E1000_ADVTXD_DTYP_DATA | + E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); + + if (pi->ipi_mflags & M_VLANTAG) + cmd_type_len |= E1000_ADVTXD_DCMD_VLE; + + first = i = pi->ipi_pidx; + ntxd = scctx->isc_ntxd[0]; + txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_ADVTXD_DCMD_RS : 0; + /* Consume the first descriptor */ + i += igb_tx_ctx_setup(txr, pi, &cmd_type_len, &olinfo_status); + if (i == scctx->isc_ntxd[0]) + i = 0; + + /* 82575 needs the queue index added */ + if (sc->hw.mac.type == e1000_82575) + olinfo_status |= txr->me << 4; + + for (j = 0; j < nsegs; j++) { + bus_size_t seglen; + bus_addr_t segaddr; + + txd = (union e1000_adv_tx_desc *)&txr->tx_base[i]; + seglen = segs[j].ds_len; + segaddr = htole64(segs[j].ds_addr); + + txd->read.buffer_addr = segaddr; + txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | + cmd_type_len | seglen); + txd->read.olinfo_status = htole32(olinfo_status); + pidx_last = i; + if (++i == scctx->isc_ntxd[0]) { + i = 0; + } + } + if (txd_flags) { + txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; + txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1); + MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx); + } + + txd->read.cmd_type_len |= htole32(E1000_TXD_CMD_EOP | txd_flags); + pi->ipi_new_pidx = i; + + return (0); +} + +static void +igb_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) +{ + struct adapter *adapter = arg; + struct em_tx_queue *que = &adapter->tx_queues[txqid]; + struct tx_ring *txr = &que->txr; + + E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx); +} + +static int +igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) +{ + struct adapter *adapter = arg; + if_softc_ctx_t scctx = adapter->shared; + struct em_tx_queue *que = &adapter->tx_queues[txqid]; + struct tx_ring *txr = &que->txr; + + qidx_t processed = 0; + int updated; + qidx_t cur, prev, ntxd, rs_cidx; + int32_t delta; + uint8_t status; + + rs_cidx = txr->tx_rs_cidx; + if (rs_cidx == txr->tx_rs_pidx) + return (0); + cur = txr->tx_rsq[rs_cidx]; + status = ((union e1000_adv_tx_desc *)&txr->tx_base[cur])->wb.status; + updated = !!(status & E1000_TXD_STAT_DD); + + if (!clear || !updated) + return (updated); + + prev = txr->tx_cidx_processed; + ntxd = scctx->isc_ntxd[0]; + do { + delta = (int32_t)cur - (int32_t)prev; + MPASS(prev == 0 || delta != 0); + if (delta < 0) + delta += ntxd; + + processed += delta; + prev = cur; + rs_cidx = (rs_cidx + 1) & (ntxd-1); + if (rs_cidx == txr->tx_rs_pidx) + break; + cur = txr->tx_rsq[rs_cidx]; + status = ((union e1000_adv_tx_desc *)&txr->tx_base[cur])->wb.status; + } while ((status & E1000_TXD_STAT_DD)); + + txr->tx_rs_cidx = rs_cidx; + txr->tx_cidx_processed = prev; + return (processed); +} + +static void +igb_isc_rxd_refill(void *arg, if_rxd_update_t iru) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + uint16_t rxqid = iru->iru_qsidx; + struct em_rx_queue *que = &sc->rx_queues[rxqid]; + union e1000_adv_rx_desc *rxd; + struct rx_ring *rxr = &que->rxr; + uint64_t *paddrs; + uint32_t next_pidx, pidx; + uint16_t count; + int i; + + paddrs = iru->iru_paddrs; + pidx = iru->iru_pidx; + count = iru->iru_count; + + for (i = 0, next_pidx = pidx; i < count; i++) { + rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[next_pidx]; + + rxd->read.pkt_addr = htole64(paddrs[i]); + if (++next_pidx == scctx->isc_nrxd[0]) + next_pidx = 0; + } +} + +static void +igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx) +{ + struct adapter *sc = arg; + struct em_rx_queue *que = &sc->rx_queues[rxqid]; + struct rx_ring *rxr = &que->rxr; + + E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx); +} + +static int +igb_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + struct em_rx_queue *que = &sc->rx_queues[rxqid]; + struct rx_ring *rxr = &que->rxr; + union e1000_adv_rx_desc *rxd; + u32 staterr = 0; + int cnt, i, iter; + + if (budget == 1) { + rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[idx]; + staterr = le32toh(rxd->wb.upper.status_error); + return (staterr & E1000_RXD_STAT_DD); + } + + for (iter = cnt = 0, i = idx; iter < scctx->isc_nrxd[0] && iter <= budget;) { + rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[i]; + staterr = le32toh(rxd->wb.upper.status_error); + + if ((staterr & E1000_RXD_STAT_DD) == 0) + break; + + if (++i == scctx->isc_nrxd[0]) { + i = 0; + } + + if (staterr & E1000_RXD_STAT_EOP) + cnt++; + iter++; + } + return (cnt); +} + +/**************************************************************** + * Routine sends data which has been dma'ed into host memory + * to upper layer. Initialize ri structure. + * + * Returns 0 upon success, errno on failure + ***************************************************************/ + +static int +igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) +{ + struct adapter *adapter = arg; + if_softc_ctx_t scctx = adapter->shared; + struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; + struct rx_ring *rxr = &que->rxr; + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); + union e1000_adv_rx_desc *rxd; + + u16 pkt_info, len; + u16 vtag = 0; + u32 ptype; + u32 staterr = 0; + bool eop; + int i = 0; + int cidx = ri->iri_cidx; + + do { + rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[cidx]; + staterr = le32toh(rxd->wb.upper.status_error); + pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info); + + MPASS ((staterr & E1000_RXD_STAT_DD) != 0); + + len = le16toh(rxd->wb.upper.length); + ptype = le32toh(rxd->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; + + ri->iri_len += len; + rxr->rx_bytes += ri->iri_len; + + rxd->wb.upper.status_error = 0; + eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); + + if (((adapter->hw.mac.type == e1000_i350) || + (adapter->hw.mac.type == e1000_i354)) && + (staterr & E1000_RXDEXT_STATERR_LB)) + vtag = be16toh(rxd->wb.upper.vlan); + else + vtag = le16toh(rxd->wb.upper.vlan); + + /* Make sure bad packets are discarded */ + if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) { + adapter->dropped_pkts++; + ++rxr->rx_discarded; + return (EBADMSG); + } + ri->iri_frags[i].irf_flid = 0; + ri->iri_frags[i].irf_idx = cidx; + ri->iri_frags[i].irf_len = len; + + if (++cidx == scctx->isc_nrxd[0]) + cidx = 0; +#ifdef notyet + if (rxr->hdr_split == TRUE) { + ri->iri_frags[i].irf_flid = 1; + ri->iri_frags[i].irf_idx = cidx; + if (++cidx == scctx->isc_nrxd[0]) + cidx = 0; + } +#endif + i++; + } while (!eop); + + rxr->rx_packets++; + + if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) + igb_rx_checksum(staterr, ri, ptype); + + if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && + (staterr & E1000_RXD_STAT_VP) != 0) { + ri->iri_vtag = vtag; + ri->iri_flags |= M_VLANTAG; + } + ri->iri_flowid = + le32toh(rxd->wb.lower.hi_dword.rss); + ri->iri_rsstype = igb_determine_rsstype(pkt_info); + ri->iri_nfrags = i; + + return (0); +} + +/********************************************************************* + * + * Verify that the hardware indicated that the checksum is valid. + * Inform the stack about the status of checksum so that stack + * doesn't spend time verifying the checksum. + * + *********************************************************************/ +static void +igb_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype) +{ + u16 status = (u16)staterr; + u8 errors = (u8) (staterr >> 24); + bool sctp = FALSE; + + /* Ignore Checksum bit is set */ + if (status & E1000_RXD_STAT_IXSM) { + ri->iri_csum_flags = 0; + return; + } + + if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && + (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) + sctp = 1; + else + sctp = 0; + + if (status & E1000_RXD_STAT_IPCS) { + /* Did it pass? */ + if (!(errors & E1000_RXD_ERR_IPE)) { + /* IP Checksum Good */ + ri->iri_csum_flags = CSUM_IP_CHECKED; + ri->iri_csum_flags |= CSUM_IP_VALID; + } else + ri->iri_csum_flags = 0; + } + + if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { + u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + if (sctp) /* reassign */ + type = CSUM_SCTP_VALID; + /* Did it pass? */ + if (!(errors & E1000_RXD_ERR_TCPE)) { + ri->iri_csum_flags |= type; + if (sctp == 0) + ri->iri_csum_data = htons(0xffff); + } + } + return; +} + +/******************************************************************** + * + * Parse the packet type to determine the appropriate hash + * + ******************************************************************/ +static int +igb_determine_rsstype(u16 pkt_info) +{ + switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { + case E1000_RXDADV_RSSTYPE_IPV4_TCP: + return M_HASHTYPE_RSS_TCP_IPV4; + case E1000_RXDADV_RSSTYPE_IPV4: + return M_HASHTYPE_RSS_IPV4; + case E1000_RXDADV_RSSTYPE_IPV6_TCP: + return M_HASHTYPE_RSS_TCP_IPV6; + case E1000_RXDADV_RSSTYPE_IPV6_EX: + return M_HASHTYPE_RSS_IPV6_EX; + case E1000_RXDADV_RSSTYPE_IPV6: + return M_HASHTYPE_RSS_IPV6; + case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: + return M_HASHTYPE_RSS_TCP_IPV6_EX; + default: + return M_HASHTYPE_OPAQUE; + } +} diff --git a/freebsd/sys/dev/fdt/fdt_common.h b/freebsd/sys/dev/fdt/fdt_common.h index 81ce4bfa..904d3e18 100644 --- a/freebsd/sys/dev/fdt/fdt_common.h +++ b/freebsd/sys/dev/fdt/fdt_common.h @@ -71,12 +71,6 @@ extern vm_paddr_t fdt_immr_pa; extern vm_offset_t fdt_immr_va; extern vm_offset_t fdt_immr_size; -struct fdt_pm_mask_entry { - char *compat; - uint32_t mask; -}; -extern struct fdt_pm_mask_entry fdt_pm_mask_table[]; - #if defined(FDT_DTB_STATIC) extern u_char fdt_static_dtb; #endif diff --git a/freebsd/sys/dev/fdt/simplebus.c b/freebsd/sys/dev/fdt/simplebus.c index d981d065..fb099965 100644 --- a/freebsd/sys/dev/fdt/simplebus.c +++ b/freebsd/sys/dev/fdt/simplebus.c @@ -127,7 +127,7 @@ simplebus_probe(device_t dev) /* * FDT data puts a "simple-bus" compatible string on many things that - * have children but aren't really busses in our world. Without a + * have children but aren't really buses in our world. Without a * ranges property we will fail to attach, so just fail to probe too. */ if (!(ofw_bus_is_compatible(dev, "simple-bus") && diff --git a/freebsd/sys/dev/mmc/bridge.h b/freebsd/sys/dev/mmc/bridge.h index a26c31ec..a780ffae 100644 --- a/freebsd/sys/dev/mmc/bridge.h +++ b/freebsd/sys/dev/mmc/bridge.h @@ -52,7 +52,7 @@ */ #ifndef DEV_MMC_BRIDGE_H -#define DEV_MMC_BRIDGE_H +#define DEV_MMC_BRIDGE_H #include @@ -60,7 +60,7 @@ * This file defines interfaces for the mmc bridge. The names chosen * are similar to or the same as the names used in Linux to allow for * easy porting of what Linux calls mmc host drivers. I use the - * FreeBSD terminology of bridge and bus for consistancy with other + * FreeBSD terminology of bridge and bus for consistency with other * drivers in the system. This file corresponds roughly to the Linux * linux/mmc/host.h file. * @@ -73,10 +73,9 @@ * to be added to the mmcbus file). * * Attached to the mmc bridge is an mmcbus. The mmcbus is described - * in dev/mmc/bus.h. + * in dev/mmc/mmcbus_if.m. */ - /* * mmc_ios is a structure that is used to store the state of the mmc/sd * bus configuration. This include the bus' clock speed, its voltage, @@ -90,6 +89,10 @@ enum mmc_vdd { vdd_330, vdd_340, vdd_350, vdd_360 }; +enum mmc_vccq { + vccq_120 = 0, vccq_180, vccq_330 +}; + enum mmc_power_mode { power_off = 0, power_up, power_on }; @@ -106,18 +109,28 @@ enum mmc_bus_width { bus_width_1 = 0, bus_width_4 = 2, bus_width_8 = 3 }; +enum mmc_drv_type { + drv_type_b = 0, drv_type_a, drv_type_c, drv_type_d +}; + enum mmc_bus_timing { - bus_timing_normal = 0, bus_timing_hs + bus_timing_normal = 0, bus_timing_hs, bus_timing_uhs_sdr12, + bus_timing_uhs_sdr25, bus_timing_uhs_sdr50, bus_timing_uhs_ddr50, + bus_timing_uhs_sdr104, bus_timing_mmc_ddr52, bus_timing_mmc_hs200, + bus_timing_mmc_hs400, bus_timing_mmc_hs400es, bus_timing_max = + bus_timing_mmc_hs400es }; struct mmc_ios { uint32_t clock; /* Speed of the clock in Hz to move data */ - enum mmc_vdd vdd; /* Voltage to apply to the power pins/ */ + enum mmc_vdd vdd; /* Voltage to apply to the power pins */ + enum mmc_vccq vccq; /* Voltage to use for signaling */ enum mmc_bus_mode bus_mode; enum mmc_chip_select chip_select; enum mmc_bus_width bus_width; enum mmc_power_mode power_mode; enum mmc_bus_timing timing; + enum mmc_drv_type drv_type; }; enum mmc_card_mode { @@ -130,9 +143,33 @@ struct mmc_host { uint32_t host_ocr; uint32_t ocr; uint32_t caps; -#define MMC_CAP_4_BIT_DATA (1 << 0) /* Can do 4-bit data transfers */ -#define MMC_CAP_8_BIT_DATA (1 << 1) /* Can do 8-bit data transfers */ -#define MMC_CAP_HSPEED (1 << 2) /* Can do High Speed transfers */ +#define MMC_CAP_4_BIT_DATA (1 << 0) /* Can do 4-bit data transfers */ +#define MMC_CAP_8_BIT_DATA (1 << 1) /* Can do 8-bit data transfers */ +#define MMC_CAP_HSPEED (1 << 2) /* Can do High Speed transfers */ +#define MMC_CAP_BOOT_NOACC (1 << 4) /* Cannot access boot partitions */ +#define MMC_CAP_WAIT_WHILE_BUSY (1 << 5) /* Host waits for busy responses */ +#define MMC_CAP_UHS_SDR12 (1 << 6) /* Can do UHS SDR12 */ +#define MMC_CAP_UHS_SDR25 (1 << 7) /* Can do UHS SDR25 */ +#define MMC_CAP_UHS_SDR50 (1 << 8) /* Can do UHS SDR50 */ +#define MMC_CAP_UHS_SDR104 (1 << 9) /* Can do UHS SDR104 */ +#define MMC_CAP_UHS_DDR50 (1 << 10) /* Can do UHS DDR50 */ +#define MMC_CAP_MMC_DDR52_120 (1 << 11) /* Can do eMMC DDR52 at 1.2 V */ +#define MMC_CAP_MMC_DDR52_180 (1 << 12) /* Can do eMMC DDR52 at 1.8 V */ +#define MMC_CAP_MMC_DDR52 (MMC_CAP_MMC_DDR52_120 | MMC_CAP_MMC_DDR52_180) +#define MMC_CAP_MMC_HS200_120 (1 << 13) /* Can do eMMC HS200 at 1.2 V */ +#define MMC_CAP_MMC_HS200_180 (1 << 14) /* Can do eMMC HS200 at 1.8 V */ +#define MMC_CAP_MMC_HS200 (MMC_CAP_MMC_HS200_120| MMC_CAP_MMC_HS200_180) +#define MMC_CAP_MMC_HS400_120 (1 << 15) /* Can do eMMC HS400 at 1.2 V */ +#define MMC_CAP_MMC_HS400_180 (1 << 16) /* Can do eMMC HS400 at 1.8 V */ +#define MMC_CAP_MMC_HS400 (MMC_CAP_MMC_HS400_120 | MMC_CAP_MMC_HS400_180) +#define MMC_CAP_MMC_HSX00_120 (MMC_CAP_MMC_HS200_120 | MMC_CAP_MMC_HS400_120) +#define MMC_CAP_MMC_ENH_STROBE (1 << 17) /* Can do eMMC Enhanced Strobe */ +#define MMC_CAP_SIGNALING_120 (1 << 18) /* Can do signaling at 1.2 V */ +#define MMC_CAP_SIGNALING_180 (1 << 19) /* Can do signaling at 1.8 V */ +#define MMC_CAP_SIGNALING_330 (1 << 20) /* Can do signaling at 3.3 V */ +#define MMC_CAP_DRIVER_TYPE_A (1 << 21) /* Can do Driver Type A */ +#define MMC_CAP_DRIVER_TYPE_C (1 << 22) /* Can do Driver Type C */ +#define MMC_CAP_DRIVER_TYPE_D (1 << 23) /* Can do Driver Type D */ enum mmc_card_mode mode; struct mmc_ios ios; /* Current state of the host */ }; @@ -140,4 +177,12 @@ struct mmc_host { extern driver_t mmc_driver; extern devclass_t mmc_devclass; +#define MMC_VERSION 3 + +#define MMC_DECLARE_BRIDGE(name) \ + DRIVER_MODULE(mmc, name, mmc_driver, mmc_devclass, NULL, NULL); \ + MODULE_DEPEND(name, mmc, MMC_VERSION, MMC_VERSION, MMC_VERSION); +#define MMC_DEPEND(name) \ + MODULE_DEPEND(name, mmc, MMC_VERSION, MMC_VERSION, MMC_VERSION); + #endif /* DEV_MMC_BRIDGE_H */ diff --git a/freebsd/sys/dev/mmc/mmc.c b/freebsd/sys/dev/mmc/mmc.c index a3232248..ab494804 100644 --- a/freebsd/sys/dev/mmc/mmc.c +++ b/freebsd/sys/dev/mmc/mmc.c @@ -3,6 +3,7 @@ /*- * Copyright (c) 2006 Bernd Walter. All rights reserved. * Copyright (c) 2006 M. Warner Losh. All rights reserved. + * Copyright (c) 2017 Marius Strobl * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -67,24 +68,17 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include +#include #include #include #include + #include #include -struct mmc_softc { - device_t dev; - struct mtx sc_mtx; - struct intr_config_hook config_intrhook; - device_t owner; - uint32_t last_rca; - int squelched; /* suppress reporting of (expected) errors */ - int log_count; - struct timeval log_time; -}; - -#define LOG_PPS 5 /* Log no more than 5 errors per second. */ +CTASSERT(bus_timing_max <= sizeof(uint32_t) * NBBY); /* * Per-card data @@ -93,7 +87,7 @@ struct mmc_ivars { uint32_t raw_cid[4]; /* Raw bits of the CID */ uint32_t raw_csd[4]; /* Raw bits of the CSD */ uint32_t raw_scr[2]; /* Raw bits of the SCR */ - uint8_t raw_ext_csd[512]; /* Raw bits of the EXT_CSD */ + uint8_t raw_ext_csd[MMC_EXTCSD_SIZE]; /* Raw bits of the EXT_CSD */ uint32_t raw_sd_status[16]; /* Raw bits of the SD_STATUS */ uint16_t rca; enum mmc_card_mode mode; @@ -103,24 +97,26 @@ struct mmc_ivars { struct mmc_sd_status sd_status; /* SD_STATUS decoded */ u_char read_only; /* True when the device is read-only */ u_char bus_width; /* Bus width to use */ - u_char timing; /* Bus timing support */ u_char high_cap; /* High Capacity card (block addressed) */ uint32_t sec_count; /* Card capacity in 512byte blocks */ + uint32_t timings; /* Mask of bus timings supported */ + uint32_t vccq_120; /* Mask of bus timings at VCCQ of 1.2 V */ + uint32_t vccq_180; /* Mask of bus timings at VCCQ of 1.8 V */ uint32_t tran_speed; /* Max speed in normal mode */ uint32_t hs_tran_speed; /* Max speed in high speed mode */ uint32_t erase_sector; /* Card native erase sector size */ + uint32_t cmd6_time; /* Generic switch timeout [us] */ char card_id_string[64];/* Formatted CID info (serial, MFG, etc) */ char card_sn_string[16];/* Formatted serial # for disk->d_ident */ }; -#define CMD_RETRIES 3 - -#define CARD_ID_FREQUENCY 400000 /* Spec requires 400kHz max during ID phase. */ +#define CMD_RETRIES 3 static SYSCTL_NODE(_hw, OID_AUTO, mmc, CTLFLAG_RD, NULL, "mmc driver"); static int mmc_debug; -SYSCTL_INT(_hw_mmc, OID_AUTO, debug, CTLFLAG_RWTUN, &mmc_debug, 0, "Debug level"); +SYSCTL_INT(_hw_mmc, OID_AUTO, debug, CTLFLAG_RWTUN, &mmc_debug, 0, + "Debug level"); /* bus entry points */ static int mmc_acquire_bus(device_t busdev, device_t dev); @@ -139,14 +135,14 @@ static int mmc_wait_for_request(device_t brdev, device_t reqdev, static int mmc_write_ivar(device_t bus, device_t child, int which, uintptr_t value); -#define MMC_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) +#define MMC_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define MMC_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) -#define MMC_LOCK_INIT(_sc) \ - mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \ +#define MMC_LOCK_INIT(_sc) \ + mtx_init(&(_sc)->sc_mtx, device_get_nameunit((_sc)->dev), \ "mmc", MTX_DEF) -#define MMC_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); -#define MMC_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); -#define MMC_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); +#define MMC_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_mtx); +#define MMC_ASSERT_LOCKED(_sc) mtx_assert(&(_sc)->sc_mtx, MA_OWNED); +#define MMC_ASSERT_UNLOCKED(_sc) mtx_assert(&(_sc)->sc_mtx, MA_NOTOWNED); static int mmc_all_send_cid(struct mmc_softc *sc, uint32_t *rawcid); static void mmc_app_decode_scr(uint32_t *raw_scr, struct mmc_scr *scr); @@ -157,7 +153,8 @@ static int mmc_app_sd_status(struct mmc_softc *sc, uint16_t rca, static int mmc_app_send_scr(struct mmc_softc *sc, uint16_t rca, uint32_t *rawscr); static int mmc_calculate_clock(struct mmc_softc *sc); -static void mmc_decode_cid_mmc(uint32_t *raw_cid, struct mmc_cid *cid); +static void mmc_decode_cid_mmc(uint32_t *raw_cid, struct mmc_cid *cid, + bool is_4_41p); static void mmc_decode_cid_sd(uint32_t *raw_cid, struct mmc_cid *cid); static void mmc_decode_csd_mmc(uint32_t *raw_csd, struct mmc_csd *csd); static void mmc_decode_csd_sd(uint32_t *raw_csd, struct mmc_csd *csd); @@ -183,25 +180,20 @@ static uint32_t mmc_select_vdd(struct mmc_softc *sc, uint32_t ocr); static int mmc_send_app_op_cond(struct mmc_softc *sc, uint32_t ocr, uint32_t *rocr); static int mmc_send_csd(struct mmc_softc *sc, uint16_t rca, uint32_t *rawcsd); -static int mmc_send_ext_csd(struct mmc_softc *sc, uint8_t *rawextcsd); static int mmc_send_if_cond(struct mmc_softc *sc, uint8_t vhs); static int mmc_send_op_cond(struct mmc_softc *sc, uint32_t ocr, uint32_t *rocr); static int mmc_send_relative_addr(struct mmc_softc *sc, uint32_t *resp); -static int mmc_send_status(struct mmc_softc *sc, uint16_t rca, - uint32_t *status); static int mmc_set_blocklen(struct mmc_softc *sc, uint32_t len); -static int mmc_set_card_bus_width(struct mmc_softc *sc, uint16_t rca, - int width); +static int mmc_set_card_bus_width(struct mmc_softc *sc, struct mmc_ivars *ivar); +static int mmc_set_power_class(struct mmc_softc *sc, struct mmc_ivars *ivar); static int mmc_set_relative_addr(struct mmc_softc *sc, uint16_t resp); -static int mmc_set_timing(struct mmc_softc *sc, int timing); -static int mmc_switch(struct mmc_softc *sc, uint8_t set, uint8_t index, - uint8_t value); +static int mmc_set_timing(struct mmc_softc *sc, struct mmc_ivars *ivar, + enum mmc_bus_timing timing); static int mmc_test_bus_width(struct mmc_softc *sc); -static int mmc_wait_for_app_cmd(struct mmc_softc *sc, uint32_t rca, - struct mmc_command *cmd, int retries); -static int mmc_wait_for_cmd(struct mmc_softc *sc, struct mmc_command *cmd, - int retries); +static uint32_t mmc_timing_to_dtr(struct mmc_ivars *ivar, + enum mmc_bus_timing timing); +static const char *mmc_timing_to_string(enum mmc_bus_timing timing); static int mmc_wait_for_command(struct mmc_softc *sc, uint32_t opcode, uint32_t arg, uint32_t flags, uint32_t *resp, int retries); static int mmc_wait_for_req(struct mmc_softc *sc, struct mmc_request *req); @@ -261,7 +253,7 @@ mmc_suspend(device_t dev) err = bus_generic_suspend(dev); if (err) - return (err); + return (err); mmc_power_down(sc); return (0); } @@ -280,8 +272,8 @@ mmc_acquire_bus(device_t busdev, device_t dev) { struct mmc_softc *sc; struct mmc_ivars *ivar; - int err; - int rca; + int err, rca; + enum mmc_bus_timing timing; err = MMCBR_ACQUIRE_HOST(device_get_parent(busdev), busdev); if (err) @@ -300,19 +292,47 @@ mmc_acquire_bus(device_t busdev, device_t dev) * unselect unless the bus code itself wants the mmc * bus, and constantly reselecting causes problems. */ - rca = mmc_get_rca(dev); + ivar = device_get_ivars(dev); + rca = ivar->rca; if (sc->last_rca != rca) { - mmc_select_card(sc, rca); + if (mmc_select_card(sc, rca) != MMC_ERR_NONE) { + device_printf(sc->dev, "Card at relative " + "address %d failed to select.\n", rca); + return (ENXIO); + } sc->last_rca = rca; + timing = mmcbr_get_timing(busdev); /* Prepare bus width for the new card. */ - ivar = device_get_ivars(dev); if (bootverbose || mmc_debug) { device_printf(busdev, - "setting bus width to %d bits\n", + "setting bus width to %d bits %s timing\n", (ivar->bus_width == bus_width_4) ? 4 : - (ivar->bus_width == bus_width_8) ? 8 : 1); + (ivar->bus_width == bus_width_8) ? 8 : 1, + mmc_timing_to_string(timing)); + } + if (mmc_set_card_bus_width(sc, ivar) != MMC_ERR_NONE) { + device_printf(sc->dev, "Card at relative " + "address %d failed to set bus width.\n", + rca); + return (ENXIO); + } + if (isset(&ivar->vccq_120, timing)) + mmcbr_set_vccq(busdev, vccq_120); + else if (isset(&ivar->vccq_180, timing)) + mmcbr_set_vccq(busdev, vccq_180); + else + mmcbr_set_vccq(busdev, vccq_330); + if (mmcbr_switch_vccq(busdev) != 0) { + device_printf(sc->dev, "Failed to set VCCQ " + "for card at relative address %d.\n", rca); + return (ENXIO); + } + if (mmc_set_power_class(sc, ivar) != MMC_ERR_NONE) { + device_printf(sc->dev, "Card at relative " + "address %d failed to set power class.\n", + rca); + return (ENXIO); } - mmc_set_card_bus_width(sc, rca, ivar->bus_width); mmcbr_set_bus_width(busdev, ivar->bus_width); mmcbr_update_ios(busdev); } @@ -409,81 +429,14 @@ mmc_wait_for_req(struct mmc_softc *sc, struct mmc_request *req) } static int -mmc_wait_for_request(device_t brdev, device_t reqdev, struct mmc_request *req) +mmc_wait_for_request(device_t brdev, device_t reqdev __unused, + struct mmc_request *req) { struct mmc_softc *sc = device_get_softc(brdev); return (mmc_wait_for_req(sc, req)); } -static int -mmc_wait_for_cmd(struct mmc_softc *sc, struct mmc_command *cmd, int retries) -{ - struct mmc_request mreq; - int err; - - do { - memset(&mreq, 0, sizeof(mreq)); - memset(cmd->resp, 0, sizeof(cmd->resp)); - cmd->retries = 0; /* Retries done here, not in hardware. */ - cmd->mrq = &mreq; - mreq.cmd = cmd; - if (mmc_wait_for_req(sc, &mreq) != 0) - err = MMC_ERR_FAILED; - else - err = cmd->error; - } while (err != MMC_ERR_NONE && retries-- > 0); - - if (err != MMC_ERR_NONE && sc->squelched == 0) { - if (ppsratecheck(&sc->log_time, &sc->log_count, LOG_PPS)) { - device_printf(sc->dev, "CMD%d failed, RESULT: %d\n", - cmd->opcode, err); - } - } - - return (err); -} - -static int -mmc_wait_for_app_cmd(struct mmc_softc *sc, uint32_t rca, - struct mmc_command *cmd, int retries) -{ - struct mmc_command appcmd; - int err; - - /* Squelch error reporting at lower levels, we report below. */ - sc->squelched++; - do { - memset(&appcmd, 0, sizeof(appcmd)); - appcmd.opcode = MMC_APP_CMD; - appcmd.arg = rca << 16; - appcmd.flags = MMC_RSP_R1 | MMC_CMD_AC; - appcmd.data = NULL; - if (mmc_wait_for_cmd(sc, &appcmd, 0) != 0) - err = MMC_ERR_FAILED; - else - err = appcmd.error; - if (err == MMC_ERR_NONE) { - if (!(appcmd.resp[0] & R1_APP_CMD)) - err = MMC_ERR_FAILED; - else if (mmc_wait_for_cmd(sc, cmd, 0) != 0) - err = MMC_ERR_FAILED; - else - err = cmd->error; - } - } while (err != MMC_ERR_NONE && retries-- > 0); - sc->squelched--; - - if (err != MMC_ERR_NONE && sc->squelched == 0) { - if (ppsratecheck(&sc->log_time, &sc->log_count, LOG_PPS)) { - device_printf(sc->dev, "ACMD%d failed, RESULT: %d\n", - cmd->opcode, err); - } - } - - return (err); -} - static int mmc_wait_for_command(struct mmc_softc *sc, uint32_t opcode, uint32_t arg, uint32_t flags, uint32_t *resp, int retries) @@ -496,7 +449,7 @@ mmc_wait_for_command(struct mmc_softc *sc, uint32_t opcode, cmd.arg = arg; cmd.flags = flags; cmd.data = NULL; - err = mmc_wait_for_cmd(sc, &cmd, retries); + err = mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, retries); if (err) return (err); if (resp) { @@ -524,7 +477,7 @@ mmc_idle_cards(struct mmc_softc *sc) cmd.arg = 0; cmd.flags = MMC_RSP_NONE | MMC_CMD_BC; cmd.data = NULL; - mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES); + mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, CMD_RETRIES); mmc_ms_delay(1); mmcbr_set_chip_select(dev, cs_dontcare); @@ -545,7 +498,8 @@ mmc_send_app_op_cond(struct mmc_softc *sc, uint32_t ocr, uint32_t *rocr) cmd.data = NULL; for (i = 0; i < 1000; i++) { - err = mmc_wait_for_app_cmd(sc, 0, &cmd, CMD_RETRIES); + err = mmc_wait_for_app_cmd(sc->dev, sc->dev, 0, &cmd, + CMD_RETRIES); if (err != MMC_ERR_NONE) break; if ((cmd.resp[0] & MMC_OCR_CARD_BUSY) || @@ -572,7 +526,7 @@ mmc_send_op_cond(struct mmc_softc *sc, uint32_t ocr, uint32_t *rocr) cmd.data = NULL; for (i = 0; i < 1000; i++) { - err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES); + err = mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, CMD_RETRIES); if (err != MMC_ERR_NONE) break; if ((cmd.resp[0] & MMC_OCR_CARD_BUSY) || @@ -598,7 +552,7 @@ mmc_send_if_cond(struct mmc_softc *sc, uint8_t vhs) cmd.flags = MMC_RSP_R7 | MMC_CMD_BCR; cmd.data = NULL; - err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES); + err = mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, CMD_RETRIES); return (err); } @@ -606,6 +560,7 @@ static void mmc_power_up(struct mmc_softc *sc) { device_t dev; + enum mmc_vccq vccq; dev = sc->dev; mmcbr_set_vdd(dev, mmc_highest_voltage(mmcbr_get_host_ocr(dev))); @@ -615,9 +570,14 @@ mmc_power_up(struct mmc_softc *sc) mmcbr_set_power_mode(dev, power_up); mmcbr_set_clock(dev, 0); mmcbr_update_ios(dev); + for (vccq = vccq_330; ; vccq--) { + mmcbr_set_vccq(dev, vccq); + if (mmcbr_switch_vccq(dev) == 0 || vccq == vccq_120) + break; + } mmc_ms_delay(1); - mmcbr_set_clock(dev, CARD_ID_FREQUENCY); + mmcbr_set_clock(dev, SD_MMC_CARD_ID_FREQUENCY); mmcbr_set_timing(dev, bus_timing_normal); mmcbr_set_power_mode(dev, power_on); mmcbr_update_ios(dev); @@ -648,24 +608,6 @@ mmc_select_card(struct mmc_softc *sc, uint16_t rca) flags, NULL, CMD_RETRIES)); } -static int -mmc_switch(struct mmc_softc *sc, uint8_t set, uint8_t index, uint8_t value) -{ - struct mmc_command cmd; - int err; - - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = MMC_SWITCH_FUNC; - cmd.arg = (MMC_SWITCH_FUNC_WR << 24) | - (index << 16) | - (value << 8) | - set; - cmd.flags = MMC_RSP_R1B | MMC_CMD_AC; - cmd.data = NULL; - err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES); - return (err); -} - static int mmc_sd_switch(struct mmc_softc *sc, uint8_t mode, uint8_t grp, uint8_t value, uint8_t *res) @@ -690,12 +632,12 @@ mmc_sd_switch(struct mmc_softc *sc, uint8_t mode, uint8_t grp, uint8_t value, data.len = 64; data.flags = MMC_DATA_READ; - err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES); + err = mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, CMD_RETRIES); return (err); } static int -mmc_set_card_bus_width(struct mmc_softc *sc, uint16_t rca, int width) +mmc_set_card_bus_width(struct mmc_softc *sc, struct mmc_ivars *ivar) { struct mmc_command cmd; int err; @@ -706,13 +648,14 @@ mmc_set_card_bus_width(struct mmc_softc *sc, uint16_t rca, int width) cmd.opcode = ACMD_SET_CLR_CARD_DETECT; cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; cmd.arg = SD_CLR_CARD_DETECT; - err = mmc_wait_for_app_cmd(sc, rca, &cmd, CMD_RETRIES); + err = mmc_wait_for_app_cmd(sc->dev, sc->dev, ivar->rca, &cmd, + CMD_RETRIES); if (err != 0) return (err); memset(&cmd, 0, sizeof(cmd)); cmd.opcode = ACMD_SET_BUS_WIDTH; cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; - switch (width) { + switch (ivar->bus_width) { case bus_width_1: cmd.arg = SD_BUS_WIDTH_1; break; @@ -722,64 +665,196 @@ mmc_set_card_bus_width(struct mmc_softc *sc, uint16_t rca, int width) default: return (MMC_ERR_INVALID); } - err = mmc_wait_for_app_cmd(sc, rca, &cmd, CMD_RETRIES); + err = mmc_wait_for_app_cmd(sc->dev, sc->dev, ivar->rca, &cmd, + CMD_RETRIES); } else { - switch (width) { + switch (ivar->bus_width) { case bus_width_1: value = EXT_CSD_BUS_WIDTH_1; break; case bus_width_4: - value = EXT_CSD_BUS_WIDTH_4; + switch (mmcbr_get_timing(sc->dev)) { + case bus_timing_mmc_ddr52: + case bus_timing_mmc_hs200: + case bus_timing_mmc_hs400: + case bus_timing_mmc_hs400es: + value = EXT_CSD_BUS_WIDTH_4_DDR; + break; + default: + value = EXT_CSD_BUS_WIDTH_4; + break; + } break; case bus_width_8: - value = EXT_CSD_BUS_WIDTH_8; + switch (mmcbr_get_timing(sc->dev)) { + case bus_timing_mmc_ddr52: + case bus_timing_mmc_hs200: + case bus_timing_mmc_hs400: + case bus_timing_mmc_hs400es: + value = EXT_CSD_BUS_WIDTH_8_DDR; + break; + default: + value = EXT_CSD_BUS_WIDTH_8; + break; + } break; default: return (MMC_ERR_INVALID); } - err = mmc_switch(sc, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BUS_WIDTH, - value); + err = mmc_switch(sc->dev, sc->dev, ivar->rca, + EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BUS_WIDTH, value, + ivar->cmd6_time, true); } return (err); } static int -mmc_set_timing(struct mmc_softc *sc, int timing) +mmc_set_power_class(struct mmc_softc *sc, struct mmc_ivars *ivar) { - int err; - uint8_t value; - u_char switch_res[64]; + device_t dev; + const uint8_t *ext_csd; + uint32_t clock; + uint8_t value; - switch (timing) { - case bus_timing_normal: - value = 0; + dev = sc->dev; + if (mmcbr_get_mode(dev) != mode_mmc || ivar->csd.spec_vers < 4) + return (MMC_ERR_NONE); + + value = 0; + ext_csd = ivar->raw_ext_csd; + clock = mmcbr_get_clock(dev); + switch (1 << mmcbr_get_vdd(dev)) { + case MMC_OCR_LOW_VOLTAGE: + if (clock <= MMC_TYPE_HS_26_MAX) + value = ext_csd[EXT_CSD_PWR_CL_26_195]; + else if (clock <= MMC_TYPE_HS_52_MAX) { + if (mmcbr_get_timing(dev) >= bus_timing_mmc_ddr52 && + ivar->bus_width >= bus_width_4) + value = ext_csd[EXT_CSD_PWR_CL_52_195_DDR]; + else + value = ext_csd[EXT_CSD_PWR_CL_52_195]; + } else if (clock <= MMC_TYPE_HS200_HS400ES_MAX) + value = ext_csd[EXT_CSD_PWR_CL_200_195]; break; - case bus_timing_hs: - value = 1; + case MMC_OCR_270_280: + case MMC_OCR_280_290: + case MMC_OCR_290_300: + case MMC_OCR_300_310: + case MMC_OCR_310_320: + case MMC_OCR_320_330: + case MMC_OCR_330_340: + case MMC_OCR_340_350: + case MMC_OCR_350_360: + if (clock <= MMC_TYPE_HS_26_MAX) + value = ext_csd[EXT_CSD_PWR_CL_26_360]; + else if (clock <= MMC_TYPE_HS_52_MAX) { + if (mmcbr_get_timing(dev) == bus_timing_mmc_ddr52 && + ivar->bus_width >= bus_width_4) + value = ext_csd[EXT_CSD_PWR_CL_52_360_DDR]; + else + value = ext_csd[EXT_CSD_PWR_CL_52_360]; + } else if (clock <= MMC_TYPE_HS200_HS400ES_MAX) { + if (ivar->bus_width == bus_width_8) + value = ext_csd[EXT_CSD_PWR_CL_200_360_DDR]; + else + value = ext_csd[EXT_CSD_PWR_CL_200_360]; + } break; default: + device_printf(dev, "No power class support for VDD 0x%x\n", + 1 << mmcbr_get_vdd(dev)); return (MMC_ERR_INVALID); } - if (mmcbr_get_mode(sc->dev) == mode_sd) + + if (ivar->bus_width == bus_width_8) + value = (value & EXT_CSD_POWER_CLASS_8BIT_MASK) >> + EXT_CSD_POWER_CLASS_8BIT_SHIFT; + else + value = (value & EXT_CSD_POWER_CLASS_4BIT_MASK) >> + EXT_CSD_POWER_CLASS_4BIT_SHIFT; + + if (value == 0) + return (MMC_ERR_NONE); + + return (mmc_switch(dev, dev, ivar->rca, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_POWER_CLASS, value, ivar->cmd6_time, true)); +} + +static int +mmc_set_timing(struct mmc_softc *sc, struct mmc_ivars *ivar, + enum mmc_bus_timing timing) +{ + u_char switch_res[64]; + uint8_t value; + int err; + + if (mmcbr_get_mode(sc->dev) == mode_sd) { + switch (timing) { + case bus_timing_normal: + value = SD_SWITCH_NORMAL_MODE; + break; + case bus_timing_hs: + value = SD_SWITCH_HS_MODE; + break; + default: + return (MMC_ERR_INVALID); + } err = mmc_sd_switch(sc, SD_SWITCH_MODE_SET, SD_SWITCH_GROUP1, value, switch_res); - else - err = mmc_switch(sc, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_HS_TIMING, value); + if (err != MMC_ERR_NONE) + return (err); + if ((switch_res[16] & 0xf) != value) + return (MMC_ERR_FAILED); + mmcbr_set_timing(sc->dev, timing); + mmcbr_update_ios(sc->dev); + } else { + switch (timing) { + case bus_timing_normal: + value = EXT_CSD_HS_TIMING_BC; + break; + case bus_timing_hs: + case bus_timing_mmc_ddr52: + value = EXT_CSD_HS_TIMING_HS; + break; + default: + return (MMC_ERR_INVALID); + } + err = mmc_switch(sc->dev, sc->dev, ivar->rca, + EXT_CSD_CMD_SET_NORMAL, EXT_CSD_HS_TIMING, value, + ivar->cmd6_time, false); + if (err != MMC_ERR_NONE) + return (err); + mmcbr_set_timing(sc->dev, timing); + mmcbr_update_ios(sc->dev); + err = mmc_switch_status(sc->dev, sc->dev, ivar->rca, + ivar->cmd6_time); + } return (err); } +static const uint8_t p8[8] = { + 0x55, 0xAA, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const uint8_t p8ok[8] = { + 0xAA, 0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const uint8_t p4[4] = { + 0x5A, 0x00, 0x00, 0x00 +}; + +static const uint8_t p4ok[4] = { + 0xA5, 0x00, 0x00, 0x00 +}; + static int mmc_test_bus_width(struct mmc_softc *sc) { struct mmc_command cmd; struct mmc_data data; - int err; uint8_t buf[8]; - uint8_t p8[8] = { 0x55, 0xAA, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - uint8_t p8ok[8] = { 0xAA, 0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - uint8_t p4[4] = { 0x5A, 0x00, 0x00, 0x00, }; - uint8_t p4ok[4] = { 0xA5, 0x00, 0x00, 0x00, }; + int err; if (mmcbr_get_caps(sc->dev) & MMC_CAP_8_BIT_DATA) { mmcbr_set_bus_width(sc->dev, bus_width_8); @@ -793,10 +868,10 @@ mmc_test_bus_width(struct mmc_softc *sc) cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC; cmd.data = &data; - data.data = p8; + data.data = __DECONST(void *, p8); data.len = 8; data.flags = MMC_DATA_WRITE; - mmc_wait_for_cmd(sc, &cmd, 0); + mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, 0); memset(&cmd, 0, sizeof(cmd)); memset(&data, 0, sizeof(data)); @@ -808,7 +883,7 @@ mmc_test_bus_width(struct mmc_softc *sc) data.data = buf; data.len = 8; data.flags = MMC_DATA_READ; - err = mmc_wait_for_cmd(sc, &cmd, 0); + err = mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, 0); sc->squelched--; mmcbr_set_bus_width(sc->dev, bus_width_1); @@ -830,10 +905,10 @@ mmc_test_bus_width(struct mmc_softc *sc) cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC; cmd.data = &data; - data.data = p4; + data.data = __DECONST(void *, p4); data.len = 4; data.flags = MMC_DATA_WRITE; - mmc_wait_for_cmd(sc, &cmd, 0); + mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, 0); memset(&cmd, 0, sizeof(cmd)); memset(&data, 0, sizeof(data)); @@ -845,7 +920,7 @@ mmc_test_bus_width(struct mmc_softc *sc) data.data = buf; data.len = 4; data.flags = MMC_DATA_READ; - err = mmc_wait_for_cmd(sc, &cmd, 0); + err = mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, 0); sc->squelched--; mmcbr_set_bus_width(sc->dev, bus_width_1); @@ -863,6 +938,7 @@ mmc_get_bits(uint32_t *bits, int bit_len, int start, int size) const int i = (bit_len / 32) - (start / 32) - 1; const int shift = start & 31; uint32_t retval = bits[i] >> shift; + if (size + shift > 32) retval |= bits[i - 1] << (32 - shift); return (retval & ((1llu << size) - 1)); @@ -887,7 +963,7 @@ mmc_decode_cid_sd(uint32_t *raw_cid, struct mmc_cid *cid) } static void -mmc_decode_cid_mmc(uint32_t *raw_cid, struct mmc_cid *cid) +mmc_decode_cid_mmc(uint32_t *raw_cid, struct mmc_cid *cid, bool is_4_41p) { int i; @@ -901,7 +977,11 @@ mmc_decode_cid_mmc(uint32_t *raw_cid, struct mmc_cid *cid) cid->prv = mmc_get_bits(raw_cid, 128, 48, 8); cid->psn = mmc_get_bits(raw_cid, 128, 16, 32); cid->mdt_month = mmc_get_bits(raw_cid, 128, 12, 4); - cid->mdt_year = mmc_get_bits(raw_cid, 128, 8, 4) + 1997; + cid->mdt_year = mmc_get_bits(raw_cid, 128, 8, 4); + if (is_4_41p) + cid->mdt_year += 2013; + else + cid->mdt_year += 1997; } static void @@ -982,10 +1062,14 @@ mmc_decode_csd_sd(uint32_t *raw_csd, struct mmc_csd *csd) csd->write_blk_misalign = mmc_get_bits(raw_csd, 128, 78, 1); csd->read_blk_misalign = mmc_get_bits(raw_csd, 128, 77, 1); csd->dsr_imp = mmc_get_bits(raw_csd, 128, 76, 1); - csd->vdd_r_curr_min = cur_min[mmc_get_bits(raw_csd, 128, 59, 3)]; - csd->vdd_r_curr_max = cur_max[mmc_get_bits(raw_csd, 128, 56, 3)]; - csd->vdd_w_curr_min = cur_min[mmc_get_bits(raw_csd, 128, 53, 3)]; - csd->vdd_w_curr_max = cur_max[mmc_get_bits(raw_csd, 128, 50, 3)]; + csd->vdd_r_curr_min = + cur_min[mmc_get_bits(raw_csd, 128, 59, 3)]; + csd->vdd_r_curr_max = + cur_max[mmc_get_bits(raw_csd, 128, 56, 3)]; + csd->vdd_w_curr_min = + cur_min[mmc_get_bits(raw_csd, 128, 53, 3)]; + csd->vdd_w_curr_max = + cur_max[mmc_get_bits(raw_csd, 128, 50, 3)]; m = mmc_get_bits(raw_csd, 128, 62, 12); e = mmc_get_bits(raw_csd, 128, 47, 3); csd->capacity = ((1 + m) << (e + 2)) * csd->read_bl_len; @@ -1010,8 +1094,8 @@ mmc_decode_csd_sd(uint32_t *raw_csd, struct mmc_csd *csd) csd->write_blk_misalign = mmc_get_bits(raw_csd, 128, 78, 1); csd->read_blk_misalign = mmc_get_bits(raw_csd, 128, 77, 1); csd->dsr_imp = mmc_get_bits(raw_csd, 128, 76, 1); - csd->capacity = ((uint64_t)mmc_get_bits(raw_csd, 128, 48, 22) + 1) * - 512 * 1024; + csd->capacity = ((uint64_t)mmc_get_bits(raw_csd, 128, 48, 22) + + 1) * 512 * 1024; csd->erase_blk_en = mmc_get_bits(raw_csd, 128, 46, 1); csd->erase_sector = mmc_get_bits(raw_csd, 128, 39, 7) + 1; csd->wp_grp_size = mmc_get_bits(raw_csd, 128, 32, 7); @@ -1109,7 +1193,7 @@ mmc_all_send_cid(struct mmc_softc *sc, uint32_t *rawcid) cmd.arg = 0; cmd.flags = MMC_RSP_R2 | MMC_CMD_BCR; cmd.data = NULL; - err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES); + err = mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, CMD_RETRIES); memcpy(rawcid, cmd.resp, 4 * sizeof(uint32_t)); return (err); } @@ -1125,7 +1209,7 @@ mmc_send_csd(struct mmc_softc *sc, uint16_t rca, uint32_t *rawcsd) cmd.arg = rca << 16; cmd.flags = MMC_RSP_R2 | MMC_CMD_BCR; cmd.data = NULL; - err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES); + err = mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, CMD_RETRIES); memcpy(rawcsd, cmd.resp, 4 * sizeof(uint32_t)); return (err); } @@ -1150,42 +1234,18 @@ mmc_app_send_scr(struct mmc_softc *sc, uint16_t rca, uint32_t *rawscr) data.len = 8; data.flags = MMC_DATA_READ; - err = mmc_wait_for_app_cmd(sc, rca, &cmd, CMD_RETRIES); + err = mmc_wait_for_app_cmd(sc->dev, sc->dev, rca, &cmd, CMD_RETRIES); rawscr[0] = be32toh(rawscr[0]); rawscr[1] = be32toh(rawscr[1]); return (err); } -static int -mmc_send_ext_csd(struct mmc_softc *sc, uint8_t *rawextcsd) -{ - int err; - struct mmc_command cmd; - struct mmc_data data; - - memset(&cmd, 0, sizeof(cmd)); - memset(&data, 0, sizeof(data)); - - memset(rawextcsd, 0, 512); - cmd.opcode = MMC_SEND_EXT_CSD; - cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC; - cmd.arg = 0; - cmd.data = &data; - - data.data = rawextcsd; - data.len = 512; - data.flags = MMC_DATA_READ; - - err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES); - return (err); -} - static int mmc_app_sd_status(struct mmc_softc *sc, uint16_t rca, uint32_t *rawsdstatus) { - int err, i; struct mmc_command cmd; struct mmc_data data; + int err, i; memset(&cmd, 0, sizeof(cmd)); memset(&data, 0, sizeof(data)); @@ -1200,7 +1260,7 @@ mmc_app_sd_status(struct mmc_softc *sc, uint16_t rca, uint32_t *rawsdstatus) data.len = 64; data.flags = MMC_DATA_READ; - err = mmc_wait_for_app_cmd(sc, rca, &cmd, CMD_RETRIES); + err = mmc_wait_for_app_cmd(sc->dev, sc->dev, rca, &cmd, CMD_RETRIES); for (i = 0; i < 16; i++) rawsdstatus[i] = be32toh(rawsdstatus[i]); return (err); @@ -1217,7 +1277,7 @@ mmc_set_relative_addr(struct mmc_softc *sc, uint16_t resp) cmd.arg = resp << 16; cmd.flags = MMC_RSP_R6 | MMC_CMD_BCR; cmd.data = NULL; - err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES); + err = mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, CMD_RETRIES); return (err); } @@ -1232,54 +1292,102 @@ mmc_send_relative_addr(struct mmc_softc *sc, uint32_t *resp) cmd.arg = 0; cmd.flags = MMC_RSP_R6 | MMC_CMD_BCR; cmd.data = NULL; - err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES); + err = mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, CMD_RETRIES); *resp = cmd.resp[0]; return (err); } static int -mmc_send_status(struct mmc_softc *sc, uint16_t rca, uint32_t *status) +mmc_set_blocklen(struct mmc_softc *sc, uint32_t len) { struct mmc_command cmd; int err; memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = MMC_SEND_STATUS; - cmd.arg = rca << 16; + cmd.opcode = MMC_SET_BLOCKLEN; + cmd.arg = len; cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; cmd.data = NULL; - err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES); - *status = cmd.resp[0]; + err = mmc_wait_for_cmd(sc->dev, sc->dev, &cmd, CMD_RETRIES); return (err); } -static int -mmc_set_blocklen(struct mmc_softc *sc, uint32_t len) +static uint32_t +mmc_timing_to_dtr(struct mmc_ivars *ivar, enum mmc_bus_timing timing) { - struct mmc_command cmd; - int err; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = MMC_SET_BLOCKLEN; - cmd.arg = len; - cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; - cmd.data = NULL; - err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES); - return (err); + switch (timing) { + case bus_timing_normal: + return (ivar->tran_speed); + case bus_timing_hs: + return (ivar->hs_tran_speed); + case bus_timing_uhs_sdr12: + return (SD_SDR12_MAX); + case bus_timing_uhs_sdr25: + return (SD_SDR25_MAX); + case bus_timing_uhs_ddr50: + return (SD_DDR50_MAX); + case bus_timing_uhs_sdr50: + return (SD_SDR50_MAX); + case bus_timing_uhs_sdr104: + return (SD_SDR104_MAX); + case bus_timing_mmc_ddr52: + return (MMC_TYPE_DDR52_MAX); + case bus_timing_mmc_hs200: + case bus_timing_mmc_hs400: + case bus_timing_mmc_hs400es: + return (MMC_TYPE_HS200_HS400ES_MAX); + } + return (0); +} + +static const char * +mmc_timing_to_string(enum mmc_bus_timing timing) +{ + + switch (timing) { + case bus_timing_normal: + return ("normal speed"); + case bus_timing_hs: + return ("high speed"); + case bus_timing_uhs_sdr12: + case bus_timing_uhs_sdr25: + case bus_timing_uhs_sdr50: + case bus_timing_uhs_sdr104: + return ("single data rate"); + case bus_timing_uhs_ddr50: + case bus_timing_mmc_ddr52: + return ("dual data rate"); + case bus_timing_mmc_hs200: + return ("HS200"); + case bus_timing_mmc_hs400: + return ("HS400"); + case bus_timing_mmc_hs400es: + return ("HS400 with enhanced strobe"); + } + return (""); } static void mmc_log_card(device_t dev, struct mmc_ivars *ivar, int newcard) { + enum mmc_bus_timing max_timing, timing; + device_printf(dev, "Card at relative address 0x%04x%s:\n", ivar->rca, newcard ? " added" : ""); device_printf(dev, " card: %s\n", ivar->card_id_string); - device_printf(dev, " bus: %ubit, %uMHz%s\n", + max_timing = bus_timing_normal; + for (timing = bus_timing_max; timing > bus_timing_normal; timing--) { + if (isset(&ivar->timings, timing)) { + max_timing = timing; + break; + } + } + device_printf(dev, " bus: %ubit, %uMHz (%s timing)\n", (ivar->bus_width == bus_width_1 ? 1 : (ivar->bus_width == bus_width_4 ? 4 : 8)), - (ivar->timing == bus_timing_hs ? - ivar->hs_tran_speed : ivar->tran_speed) / 1000000, - ivar->timing == bus_timing_hs ? ", high speed timing" : ""); + mmc_timing_to_dtr(ivar, timing) / 1000000, + mmc_timing_to_string(timing)); device_printf(dev, " memory: %u blocks, erase sector %u blocks%s\n", ivar->sec_count, ivar->erase_sector, ivar->read_only ? ", read-only" : ""); @@ -1288,14 +1396,16 @@ mmc_log_card(device_t dev, struct mmc_ivars *ivar, int newcard) static void mmc_discover_cards(struct mmc_softc *sc) { + u_char switch_res[64]; + uint32_t raw_cid[4]; struct mmc_ivars *ivar = NULL; device_t *devlist; - int err, i, devcount, newcard; - uint32_t raw_cid[4], resp, sec_count, status; device_t child; + int devcount, err, host_caps, i, newcard; + uint32_t resp, sec_count, status; uint16_t rca = 2; - u_char switch_res[64]; + host_caps = mmcbr_get_caps(sc->dev); if (bootverbose || mmc_debug) device_printf(sc->dev, "Probing cards\n"); while (1) { @@ -1309,18 +1419,21 @@ mmc_discover_cards(struct mmc_softc *sc) break; } newcard = 1; - if ((err = device_get_children(sc->dev, &devlist, &devcount)) != 0) + if ((err = device_get_children(sc->dev, &devlist, + &devcount)) != 0) return; for (i = 0; i < devcount; i++) { ivar = device_get_ivars(devlist[i]); - if (memcmp(ivar->raw_cid, raw_cid, sizeof(raw_cid)) == 0) { + if (memcmp(ivar->raw_cid, raw_cid, sizeof(raw_cid)) == + 0) { newcard = 0; break; } } free(devlist, M_TEMP); if (bootverbose || mmc_debug) { - device_printf(sc->dev, "%sard detected (CID %08x%08x%08x%08x)\n", + device_printf(sc->dev, + "%sard detected (CID %08x%08x%08x%08x)\n", newcard ? "New c" : "C", raw_cid[0], raw_cid[1], raw_cid[2], raw_cid[3]); } @@ -1332,14 +1445,24 @@ mmc_discover_cards(struct mmc_softc *sc) if (mmcbr_get_ro(sc->dev)) ivar->read_only = 1; ivar->bus_width = bus_width_1; - ivar->timing = bus_timing_normal; + setbit(&ivar->timings, bus_timing_normal); ivar->mode = mmcbr_get_mode(sc->dev); if (ivar->mode == mode_sd) { mmc_decode_cid_sd(ivar->raw_cid, &ivar->cid); - mmc_send_relative_addr(sc, &resp); + err = mmc_send_relative_addr(sc, &resp); + if (err != MMC_ERR_NONE) { + device_printf(sc->dev, + "Error getting RCA %d\n", err); + break; + } ivar->rca = resp >> 16; /* Get card CSD. */ - mmc_send_csd(sc, ivar->rca, ivar->raw_csd); + err = mmc_send_csd(sc, ivar->rca, ivar->raw_csd); + if (err != MMC_ERR_NONE) { + device_printf(sc->dev, + "Error getting CSD %d\n", err); + break; + } if (bootverbose || mmc_debug) device_printf(sc->dev, "%sard detected (CSD %08x%08x%08x%08x)\n", @@ -1354,7 +1477,8 @@ mmc_discover_cards(struct mmc_softc *sc) ivar->erase_sector = ivar->csd.erase_sector * ivar->csd.write_bl_len / MMC_SECTOR_SIZE; - err = mmc_send_status(sc, ivar->rca, &status); + err = mmc_send_status(sc->dev, sc->dev, ivar->rca, + &status); if (err != MMC_ERR_NONE) { device_printf(sc->dev, "Error reading card status %d\n", err); @@ -1366,19 +1490,30 @@ mmc_discover_cards(struct mmc_softc *sc) break; } - /* Get card SCR. Card must be selected to fetch it. */ - mmc_select_card(sc, ivar->rca); - mmc_app_send_scr(sc, ivar->rca, ivar->raw_scr); + /* Get card SCR. Card must be selected to fetch it. */ + err = mmc_select_card(sc, ivar->rca); + if (err != MMC_ERR_NONE) { + device_printf(sc->dev, + "Error selecting card %d\n", err); + break; + } + err = mmc_app_send_scr(sc, ivar->rca, ivar->raw_scr); + if (err != MMC_ERR_NONE) { + device_printf(sc->dev, + "Error reading SCR %d\n", err); + break; + } mmc_app_decode_scr(ivar->raw_scr, &ivar->scr); /* Get card switch capabilities (command class 10). */ if ((ivar->scr.sda_vsn >= 1) && - (ivar->csd.ccc & (1<<10))) { - mmc_sd_switch(sc, SD_SWITCH_MODE_CHECK, + (ivar->csd.ccc & (1 << 10))) { + err = mmc_sd_switch(sc, SD_SWITCH_MODE_CHECK, SD_SWITCH_GROUP1, SD_SWITCH_NOCHANGE, switch_res); - if (switch_res[13] & 2) { - ivar->timing = bus_timing_hs; - ivar->hs_tran_speed = SD_MAX_HS; + if (err == MMC_ERR_NONE && + switch_res[13] & (1 << SD_SWITCH_HS_MODE)) { + setbit(&ivar->timings, bus_timing_hs); + ivar->hs_tran_speed = SD_HS_MAX; } } @@ -1388,15 +1523,16 @@ mmc_discover_cards(struct mmc_softc *sc) * commands, although the state tables / diagrams in the * standard suggest they go back to the transfer state. * Other cards don't become deselected, and if we - * atttempt to blindly re-select them, we get timeout + * attempt to blindly re-select them, we get timeout * errors from some controllers. So we deselect then * reselect to handle all situations. The only thing we * use from the sd_status is the erase sector size, but * it is still nice to get that right. */ mmc_select_card(sc, 0); - mmc_select_card(sc, ivar->rca); - mmc_app_sd_status(sc, ivar->rca, ivar->raw_sd_status); + (void)mmc_select_card(sc, ivar->rca); + (void)mmc_app_sd_status(sc, ivar->rca, + ivar->raw_sd_status); mmc_app_decode_sd_status(ivar->raw_sd_status, &ivar->sd_status); if (ivar->sd_status.au_size != 0) { @@ -1404,7 +1540,7 @@ mmc_discover_cards(struct mmc_softc *sc) 16 << ivar->sd_status.au_size; } /* Find max supported bus width. */ - if ((mmcbr_get_caps(sc->dev) & MMC_CAP_4_BIT_DATA) && + if ((host_caps & MMC_CAP_4_BIT_DATA) && (ivar->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) ivar->bus_width = bus_width_4; @@ -1433,11 +1569,18 @@ mmc_discover_cards(struct mmc_softc *sc) mmc_select_card(sc, 0); return; } - mmc_decode_cid_mmc(ivar->raw_cid, &ivar->cid); ivar->rca = rca++; - mmc_set_relative_addr(sc, ivar->rca); + err = mmc_set_relative_addr(sc, ivar->rca); + if (err != MMC_ERR_NONE) { + device_printf(sc->dev, "Error setting RCA %d\n", err); + break; + } /* Get card CSD. */ - mmc_send_csd(sc, ivar->rca, ivar->raw_csd); + err = mmc_send_csd(sc, ivar->rca, ivar->raw_csd); + if (err != MMC_ERR_NONE) { + device_printf(sc->dev, "Error getting CSD %d\n", err); + break; + } if (bootverbose || mmc_debug) device_printf(sc->dev, "%sard detected (CSD %08x%08x%08x%08x)\n", @@ -1451,7 +1594,7 @@ mmc_discover_cards(struct mmc_softc *sc) ivar->erase_sector = ivar->csd.erase_sector * ivar->csd.write_bl_len / MMC_SECTOR_SIZE; - err = mmc_send_status(sc, ivar->rca, &status); + err = mmc_send_status(sc->dev, sc->dev, ivar->rca, &status); if (err != MMC_ERR_NONE) { device_printf(sc->dev, "Error reading card status %d\n", err); @@ -1463,11 +1606,22 @@ mmc_discover_cards(struct mmc_softc *sc) break; } - mmc_select_card(sc, ivar->rca); + err = mmc_select_card(sc, ivar->rca); + if (err != MMC_ERR_NONE) { + device_printf(sc->dev, "Error selecting card %d\n", + err); + break; + } - /* Only MMC >= 4.x cards support EXT_CSD. */ + /* Only MMC >= 4.x devices support EXT_CSD. */ if (ivar->csd.spec_vers >= 4) { - mmc_send_ext_csd(sc, ivar->raw_ext_csd); + err = mmc_send_ext_csd(sc->dev, sc->dev, + ivar->raw_ext_csd); + if (err != MMC_ERR_NONE) { + device_printf(sc->dev, + "Error reading EXT_CSD %d\n", err); + break; + } /* Handle extended capacity from EXT_CSD */ sec_count = ivar->raw_ext_csd[EXT_CSD_SEC_CNT] + (ivar->raw_ext_csd[EXT_CSD_SEC_CNT + 1] << 8) + @@ -1477,28 +1631,54 @@ mmc_discover_cards(struct mmc_softc *sc) ivar->sec_count = sec_count; ivar->high_cap = 1; } - /* Get card speed in high speed mode. */ - ivar->timing = bus_timing_hs; - if (ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] - & EXT_CSD_CARD_TYPE_52) - ivar->hs_tran_speed = MMC_TYPE_52_MAX_HS; - else if (ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] - & EXT_CSD_CARD_TYPE_26) - ivar->hs_tran_speed = MMC_TYPE_26_MAX_HS; - else - ivar->hs_tran_speed = ivar->tran_speed; + /* Get device speeds beyond normal mode. */ + if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] & + EXT_CSD_CARD_TYPE_HS_52) != 0) { + setbit(&ivar->timings, bus_timing_hs); + ivar->hs_tran_speed = MMC_TYPE_HS_52_MAX; + } else if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] & + EXT_CSD_CARD_TYPE_HS_26) != 0) { + setbit(&ivar->timings, bus_timing_hs); + ivar->hs_tran_speed = MMC_TYPE_HS_26_MAX; + } + if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] & + EXT_CSD_CARD_TYPE_DDR_52_1_2V) != 0 && + (host_caps & MMC_CAP_SIGNALING_120) != 0) { + setbit(&ivar->timings, bus_timing_mmc_ddr52); + setbit(&ivar->vccq_120, bus_timing_mmc_ddr52); + } + if ((ivar->raw_ext_csd[EXT_CSD_CARD_TYPE] & + EXT_CSD_CARD_TYPE_DDR_52_1_8V) != 0 && + (host_caps & MMC_CAP_SIGNALING_180) != 0) { + setbit(&ivar->timings, bus_timing_mmc_ddr52); + setbit(&ivar->vccq_180, bus_timing_mmc_ddr52); + } + /* + * Determine generic switch timeout (provided in + * units of 10 ms), defaulting to 500 ms. + */ + ivar->cmd6_time = 500 * 1000; + if (ivar->csd.spec_vers >= 6) + ivar->cmd6_time = 10 * + ivar->raw_ext_csd[EXT_CSD_GEN_CMD6_TIME]; /* Find max supported bus width. */ ivar->bus_width = mmc_test_bus_width(sc); /* Handle HC erase sector size. */ if (ivar->raw_ext_csd[EXT_CSD_ERASE_GRP_SIZE] != 0) { ivar->erase_sector = 1024 * ivar->raw_ext_csd[EXT_CSD_ERASE_GRP_SIZE]; - mmc_switch(sc, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_ERASE_GRP_DEF, 1); + err = mmc_switch(sc->dev, sc->dev, ivar->rca, + EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_ERASE_GRP_DEF, + EXT_CSD_ERASE_GRP_DEF_EN, + ivar->cmd6_time, true); + if (err != MMC_ERR_NONE) { + device_printf(sc->dev, + "Error setting erase group %d\n", + err); + break; + } } - } else { - ivar->bus_width = bus_width_1; - ivar->timing = bus_timing_normal; } /* @@ -1513,6 +1693,8 @@ mmc_discover_cards(struct mmc_softc *sc) ivar->csd.write_bl_len != MMC_SECTOR_SIZE) mmc_set_blocklen(sc, MMC_SECTOR_SIZE); + mmc_decode_cid_mmc(ivar->raw_cid, &ivar->cid, + ivar->raw_ext_csd[EXT_CSD_REV] >= 5); mmc_format_card_id_string(ivar); if (bootverbose || mmc_debug) @@ -1529,7 +1711,7 @@ mmc_discover_cards(struct mmc_softc *sc) static void mmc_rescan_cards(struct mmc_softc *sc) { - struct mmc_ivars *ivar = NULL; + struct mmc_ivars *ivar; device_t *devlist; int err, i, devcount; @@ -1537,9 +1719,10 @@ mmc_rescan_cards(struct mmc_softc *sc) return; for (i = 0; i < devcount; i++) { ivar = device_get_ivars(devlist[i]); - if (mmc_select_card(sc, ivar->rca)) { + if (mmc_select_card(sc, ivar->rca) != MMC_ERR_NONE) { if (bootverbose || mmc_debug) - device_printf(sc->dev, "Card at relative address %d lost.\n", + device_printf(sc->dev, + "Card at relative address %d lost.\n", ivar->rca); device_delete_child(sc->dev, devlist[i]); free(ivar, M_DEVBUF); @@ -1561,7 +1744,8 @@ mmc_delete_cards(struct mmc_softc *sc) for (i = 0; i < devcount; i++) { ivar = device_get_ivars(devlist[i]); if (bootverbose || mmc_debug) - device_printf(sc->dev, "Card at relative address %d deleted.\n", + device_printf(sc->dev, + "Card at relative address %d deleted.\n", ivar->rca); device_delete_child(sc->dev, devlist[i]); free(ivar, M_DEVBUF); @@ -1591,7 +1775,8 @@ mmc_go_discovery(struct mmc_softc *sc) mmc_idle_cards(sc); err = mmc_send_if_cond(sc, 1); if ((bootverbose || mmc_debug) && err == 0) - device_printf(sc->dev, "SD 2.0 interface conditions: OK\n"); + device_printf(sc->dev, + "SD 2.0 interface conditions: OK\n"); if (mmc_send_app_op_cond(sc, 0, &ocr) != MMC_ERR_NONE) { if (bootverbose || mmc_debug) device_printf(sc->dev, "SD probe: failed\n"); @@ -1601,13 +1786,15 @@ mmc_go_discovery(struct mmc_softc *sc) mmcbr_set_mode(dev, mode_mmc); if (mmc_send_op_cond(sc, 0, &ocr) != MMC_ERR_NONE) { if (bootverbose || mmc_debug) - device_printf(sc->dev, "MMC probe: failed\n"); + device_printf(sc->dev, + "MMC probe: failed\n"); ocr = 0; /* Failed both, powerdown. */ } else if (bootverbose || mmc_debug) device_printf(sc->dev, "MMC probe: OK (OCR: 0x%08x)\n", ocr); } else if (bootverbose || mmc_debug) - device_printf(sc->dev, "SD probe: OK (OCR: 0x%08x)\n", ocr); + device_printf(sc->dev, "SD probe: OK (OCR: 0x%08x)\n", + ocr); sc->squelched--; mmcbr_set_ocr(dev, mmc_select_vdd(sc, ocr)); @@ -1615,7 +1802,7 @@ mmc_go_discovery(struct mmc_softc *sc) mmc_idle_cards(sc); } else { mmcbr_set_bus_mode(dev, opendrain); - mmcbr_set_clock(dev, CARD_ID_FREQUENCY); + mmcbr_set_clock(dev, SD_MMC_CARD_ID_FREQUENCY); mmcbr_update_ios(dev); /* XXX recompute vdd based on new cards? */ } @@ -1624,7 +1811,8 @@ mmc_go_discovery(struct mmc_softc *sc) * one card on the bus. */ if (bootverbose || mmc_debug) - device_printf(sc->dev, "Current OCR: 0x%08x\n", mmcbr_get_ocr(dev)); + device_printf(sc->dev, "Current OCR: 0x%08x\n", + mmcbr_get_ocr(dev)); if (mmcbr_get_ocr(dev) == 0) { device_printf(sc->dev, "No compatible cards found on bus\n"); mmc_delete_cards(sc); @@ -1646,56 +1834,69 @@ mmc_go_discovery(struct mmc_softc *sc) mmcbr_set_bus_mode(dev, pushpull); mmcbr_update_ios(dev); mmc_calculate_clock(sc); - bus_generic_attach(dev); -/* mmc_update_children_sysctl(dev);*/ } static int mmc_calculate_clock(struct mmc_softc *sc) { - int max_dtr, max_hs_dtr, max_timing; - int nkid, i, f_max; device_t *kids; struct mmc_ivars *ivar; - - f_max = mmcbr_get_f_max(sc->dev); - max_dtr = max_hs_dtr = f_max; - if ((mmcbr_get_caps(sc->dev) & MMC_CAP_HSPEED)) + int host_caps, i, nkid; + uint32_t dtr, max_dtr; + enum mmc_bus_timing max_timing, timing; + bool changed; + + max_dtr = mmcbr_get_f_max(sc->dev); + host_caps = mmcbr_get_caps(sc->dev); + if ((host_caps & MMC_CAP_MMC_DDR52) != 0) + max_timing = bus_timing_mmc_ddr52; + else if ((host_caps & MMC_CAP_HSPEED) != 0) max_timing = bus_timing_hs; else max_timing = bus_timing_normal; if (device_get_children(sc->dev, &kids, &nkid) != 0) panic("can't get children"); - for (i = 0; i < nkid; i++) { - ivar = device_get_ivars(kids[i]); - if (ivar->timing < max_timing) - max_timing = ivar->timing; - if (ivar->tran_speed < max_dtr) - max_dtr = ivar->tran_speed; - if (ivar->hs_tran_speed < max_hs_dtr) - max_hs_dtr = ivar->hs_tran_speed; + do { + changed = false; + for (i = 0; i < nkid; i++) { + ivar = device_get_ivars(kids[i]); + if (isclr(&ivar->timings, max_timing)) { + for (timing = max_timing; timing >= + bus_timing_normal; timing--) { + if (isset(&ivar->timings, timing)) { + max_timing = timing; + break; + } + } + changed = true; + } + dtr = mmc_timing_to_dtr(ivar, max_timing); + if (dtr < max_dtr) { + max_dtr = dtr; + changed = true; + } + } + } while (changed == true); + if (bootverbose || mmc_debug) { + device_printf(sc->dev, + "setting transfer rate to %d.%03dMHz (%s timing)\n", + max_dtr / 1000000, (max_dtr / 1000) % 1000, + mmc_timing_to_string(max_timing)); } for (i = 0; i < nkid; i++) { ivar = device_get_ivars(kids[i]); - if (ivar->timing == bus_timing_normal) + if ((ivar->timings & ~(1 << bus_timing_normal)) == 0) continue; - mmc_select_card(sc, ivar->rca); - mmc_set_timing(sc, max_timing); + if (mmc_select_card(sc, ivar->rca) != MMC_ERR_NONE || + mmc_set_timing(sc, ivar, max_timing) != MMC_ERR_NONE) + device_printf(sc->dev, "Card at relative address %d " + "failed to set timing.\n", ivar->rca); } mmc_select_card(sc, 0); free(kids, M_TEMP); - if (max_timing == bus_timing_hs) - max_dtr = max_hs_dtr; - if (bootverbose || mmc_debug) { - device_printf(sc->dev, - "setting transfer rate to %d.%03dMHz%s\n", - max_dtr / 1000000, (max_dtr / 1000) % 1000, - max_timing == bus_timing_hs ? " (high speed timing)" : ""); - } - mmcbr_set_timing(sc->dev, max_timing); mmcbr_set_clock(sc->dev, max_dtr); mmcbr_update_ios(sc->dev); - return max_dtr; + return (max_dtr); } static void @@ -1706,6 +1907,8 @@ mmc_scan(struct mmc_softc *sc) mmc_acquire_bus(dev, dev); mmc_go_discovery(sc); mmc_release_bus(dev, dev); + + bus_generic_attach(dev); } static int @@ -1716,6 +1919,9 @@ mmc_read_ivar(device_t bus, device_t child, int which, uintptr_t *result) switch (which) { default: return (EINVAL); + case MMC_IVAR_SPEC_VERS: + *result = ivar->csd.spec_vers; + break; case MMC_IVAR_DSR_IMP: *result = ivar->csd.dsr_imp; break; @@ -1762,6 +1968,7 @@ mmc_read_ivar(device_t bus, device_t child, int which, uintptr_t *result) static int mmc_write_ivar(device_t bus, device_t child, int which, uintptr_t value) { + /* * None are writable ATM */ @@ -1814,4 +2021,4 @@ driver_t mmc_driver = { }; devclass_t mmc_devclass; -MODULE_VERSION(mmc, 1); +MODULE_VERSION(mmc, MMC_VERSION); diff --git a/freebsd/sys/dev/mmc/mmc_ioctl.h b/freebsd/sys/dev/mmc/mmc_ioctl.h new file mode 100644 index 00000000..97cff068 --- /dev/null +++ b/freebsd/sys/dev/mmc/mmc_ioctl.h @@ -0,0 +1,64 @@ +/*- + * Copyright (c) 2017 Marius Strobl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _DEV_MMC_MMC_IOCTL_H_ +#define _DEV_MMC_MMC_IOCTL_H_ + +struct mmc_ioc_cmd { + int write_flag; /* 0: RD, 1: WR, (1 << 31): reliable WR */ + int is_acmd; /* 0: normal, 1: use CMD55 */ + uint32_t opcode; + uint32_t arg; + uint32_t response[4]; + u_int flags; + u_int blksz; + u_int blocks; + u_int __spare[4]; + uint32_t __pad; + uint64_t data_ptr; +}; + +#define mmc_ioc_cmd_set_data(mic, ptr) \ + (mic).data_ptr = (uint64_t)(uintptr_t)(ptr) + +struct mmc_ioc_multi_cmd { + uint64_t num_of_cmds; + struct mmc_ioc_cmd cmds[0]; +}; + +#define MMC_IOC_BASE 'M' + +#define MMC_IOC_CMD _IOWR(MMC_IOC_BASE, 0, struct mmc_ioc_cmd) +#define MMC_IOC_CMD_MULTI _IOWR(MMC_IOC_BASE, 1, struct mmc_ioc_multi_cmd) + +/* Maximum accepted data transfer size */ +#define MMC_IOC_MAX_BYTES (512 * 256) +/* Maximum accepted number of commands */ +#define MMC_IOC_MAX_CMDS 255 + +#endif /* _DEV_MMC_MMC_IOCTL_H_ */ diff --git a/freebsd/sys/dev/mmc/mmc_private.h b/freebsd/sys/dev/mmc/mmc_private.h new file mode 100644 index 00000000..bbca0c60 --- /dev/null +++ b/freebsd/sys/dev/mmc/mmc_private.h @@ -0,0 +1,69 @@ +/*- + * Copyright (c) 2006 Bernd Walter. All rights reserved. + * Copyright (c) 2006 M. Warner Losh. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Portions of this software may have been developed with reference to + * the SD Simplified Specification. The following disclaimer may apply: + * + * The following conditions apply to the release of the simplified + * specification ("Simplified Specification") by the SD Card Association and + * the SD Group. The Simplified Specification is a subset of the complete SD + * Specification which is owned by the SD Card Association and the SD + * Group. This Simplified Specification is provided on a non-confidential + * basis subject to the disclaimers below. Any implementation of the + * Simplified Specification may require a license from the SD Card + * Association, SD Group, SD-3C LLC or other third parties. + * + * Disclaimers: + * + * The information contained in the Simplified Specification is presented only + * as a standard specification for SD Cards and SD Host/Ancillary products and + * is provided "AS-IS" without any representations or warranties of any + * kind. No responsibility is assumed by the SD Group, SD-3C LLC or the SD + * Card Association for any damages, any infringements of patents or other + * right of the SD Group, SD-3C LLC, the SD Card Association or any third + * parties, which may result from its use. No license is granted by + * implication, estoppel or otherwise under any patent or other rights of the + * SD Group, SD-3C LLC, the SD Card Association or any third party. Nothing + * herein shall be construed as an obligation by the SD Group, the SD-3C LLC + * or the SD Card Association to disclose or distribute any technical + * information, know-how or other confidential information to any third party. + * + * $FreeBSD$ + */ + +#ifndef DEV_MMC_PRIVATE_H +#define DEV_MMC_PRIVATE_H + +struct mmc_softc { + device_t dev; + struct mtx sc_mtx; + struct intr_config_hook config_intrhook; + device_t owner; + uint32_t last_rca; + int squelched; /* suppress reporting of (expected) errors */ + int log_count; + struct timeval log_time; +}; + +#endif /* DEV_MMC_PRIVATE_H */ diff --git a/freebsd/sys/dev/mmc/mmc_subr.c b/freebsd/sys/dev/mmc/mmc_subr.c new file mode 100644 index 00000000..294fd9c0 --- /dev/null +++ b/freebsd/sys/dev/mmc/mmc_subr.c @@ -0,0 +1,254 @@ +#include + +/*- + * Copyright (c) 2006 Bernd Walter. All rights reserved. + * Copyright (c) 2006 M. Warner Losh. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Portions of this software may have been developed with reference to + * the SD Simplified Specification. The following disclaimer may apply: + * + * The following conditions apply to the release of the simplified + * specification ("Simplified Specification") by the SD Card Association and + * the SD Group. The Simplified Specification is a subset of the complete SD + * Specification which is owned by the SD Card Association and the SD + * Group. This Simplified Specification is provided on a non-confidential + * basis subject to the disclaimers below. Any implementation of the + * Simplified Specification may require a license from the SD Card + * Association, SD Group, SD-3C LLC or other third parties. + * + * Disclaimers: + * + * The information contained in the Simplified Specification is presented only + * as a standard specification for SD Cards and SD Host/Ancillary products and + * is provided "AS-IS" without any representations or warranties of any + * kind. No responsibility is assumed by the SD Group, SD-3C LLC or the SD + * Card Association for any damages, any infringements of patents or other + * right of the SD Group, SD-3C LLC, the SD Card Association or any third + * parties, which may result from its use. No license is granted by + * implication, estoppel or otherwise under any patent or other rights of the + * SD Group, SD-3C LLC, the SD Card Association or any third party. Nothing + * herein shall be construed as an obligation by the SD Group, the SD-3C LLC + * or the SD Card Association to disclose or distribute any technical + * information, know-how or other confidential information to any third party. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define CMD_RETRIES 3 +#define LOG_PPS 5 /* Log no more than 5 errors per second. */ + +int +mmc_wait_for_cmd(device_t brdev, device_t reqdev, struct mmc_command *cmd, + int retries) +{ + struct mmc_request mreq; + struct mmc_softc *sc; + int err; + + do { + memset(&mreq, 0, sizeof(mreq)); + memset(cmd->resp, 0, sizeof(cmd->resp)); + cmd->retries = 0; /* Retries done here, not in hardware. */ + cmd->mrq = &mreq; + if (cmd->data != NULL) + cmd->data->mrq = &mreq; + mreq.cmd = cmd; + if (MMCBUS_WAIT_FOR_REQUEST(brdev, reqdev, &mreq) != 0) + err = MMC_ERR_FAILED; + else + err = cmd->error; + } while (err != MMC_ERR_NONE && retries-- > 0); + + if (err != MMC_ERR_NONE && brdev == reqdev) { + sc = device_get_softc(brdev); + if (sc->squelched == 0 && ppsratecheck(&sc->log_time, + &sc->log_count, LOG_PPS)) { + device_printf(sc->dev, "CMD%d failed, RESULT: %d\n", + cmd->opcode, err); + } + } + + return (err); +} + +int +mmc_wait_for_app_cmd(device_t brdev, device_t reqdev, uint16_t rca, + struct mmc_command *cmd, int retries) +{ + struct mmc_command appcmd; + struct mmc_softc *sc; + int err; + + sc = device_get_softc(brdev); + + /* Squelch error reporting at lower levels, we report below. */ + sc->squelched++; + do { + memset(&appcmd, 0, sizeof(appcmd)); + appcmd.opcode = MMC_APP_CMD; + appcmd.arg = (uint32_t)rca << 16; + appcmd.flags = MMC_RSP_R1 | MMC_CMD_AC; + if (mmc_wait_for_cmd(brdev, reqdev, &appcmd, 0) != 0) + err = MMC_ERR_FAILED; + else + err = appcmd.error; + if (err == MMC_ERR_NONE) { + if (!(appcmd.resp[0] & R1_APP_CMD)) + err = MMC_ERR_FAILED; + else if (mmc_wait_for_cmd(brdev, reqdev, cmd, 0) != 0) + err = MMC_ERR_FAILED; + else + err = cmd->error; + } + } while (err != MMC_ERR_NONE && retries-- > 0); + sc->squelched--; + + if (err != MMC_ERR_NONE && brdev == reqdev) { + sc = device_get_softc(brdev); + if (sc->squelched == 0 && ppsratecheck(&sc->log_time, + &sc->log_count, LOG_PPS)) { + device_printf(sc->dev, "ACMD%d failed, RESULT: %d\n", + cmd->opcode, err); + } + } + + return (err); +} + +int +mmc_switch(device_t brdev, device_t reqdev, uint16_t rca, uint8_t set, + uint8_t index, uint8_t value, u_int timeout, bool status) +{ + struct mmc_command cmd; + int err; + + KASSERT(timeout != 0, ("%s: no timeout", __func__)); + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = MMC_SWITCH_FUNC; + cmd.arg = (MMC_SWITCH_FUNC_WR << 24) | (index << 16) | (value << 8) | + set; + /* + * If the hardware supports busy detection but the switch timeout + * exceeds the maximum host timeout, use a R1 instead of a R1B + * response in order to keep the hardware from timing out. + */ + if (mmcbr_get_caps(brdev) & MMC_CAP_WAIT_WHILE_BUSY && + timeout > mmcbr_get_max_busy_timeout(brdev)) + cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; + else + cmd.flags = MMC_RSP_R1B | MMC_CMD_AC; + err = mmc_wait_for_cmd(brdev, reqdev, &cmd, CMD_RETRIES); + if (err != MMC_ERR_NONE || status == false) + return (err); + return (mmc_switch_status(brdev, reqdev, rca, timeout)); +} + +int +mmc_switch_status(device_t brdev, device_t reqdev, uint16_t rca, u_int timeout) +{ + struct timeval cur, end; + int err; + uint32_t status; + + KASSERT(timeout != 0, ("%s: no timeout", __func__)); + + /* + * Note that when using a R1B response in mmc_switch(), bridges of + * type MMC_CAP_WAIT_WHILE_BUSY will issue mmc_send_status() only + * once and then exit the loop. + */ + for (;;) { + err = mmc_send_status(brdev, reqdev, rca, &status); + if (err != MMC_ERR_NONE) + break; + if (R1_CURRENT_STATE(status) == R1_STATE_TRAN) + break; + getmicrouptime(&cur); + if (end.tv_sec == 0 && end.tv_usec == 0) { + end.tv_usec = timeout; + timevaladd(&end, &cur); + } + if (timevalcmp(&cur, &end, >)) { + err = MMC_ERR_TIMEOUT; + break; + } + } + if (err == MMC_ERR_NONE && R1_CURRENT_STATE(status) == R1_SWITCH_ERROR) + return (MMC_ERR_FAILED); + return (err); +} + +int +mmc_send_ext_csd(device_t brdev, device_t reqdev, uint8_t *rawextcsd) +{ + struct mmc_command cmd; + struct mmc_data data; + int err; + + memset(&cmd, 0, sizeof(cmd)); + memset(&data, 0, sizeof(data)); + + memset(rawextcsd, 0, MMC_EXTCSD_SIZE); + cmd.opcode = MMC_SEND_EXT_CSD; + cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC; + cmd.data = &data; + + data.data = rawextcsd; + data.len = MMC_EXTCSD_SIZE; + data.flags = MMC_DATA_READ; + + err = mmc_wait_for_cmd(brdev, reqdev, &cmd, CMD_RETRIES); + return (err); +} + +int +mmc_send_status(device_t brdev, device_t reqdev, uint16_t rca, uint32_t *status) +{ + struct mmc_command cmd; + int err; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = MMC_SEND_STATUS; + cmd.arg = (uint32_t)rca << 16; + cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; + err = mmc_wait_for_cmd(brdev, reqdev, &cmd, CMD_RETRIES); + *status = cmd.resp[0]; + return (err); +} diff --git a/freebsd/sys/dev/mmc/mmc_subr.h b/freebsd/sys/dev/mmc/mmc_subr.h new file mode 100644 index 00000000..6e300d2f --- /dev/null +++ b/freebsd/sys/dev/mmc/mmc_subr.h @@ -0,0 +1,72 @@ +/*- + * Copyright (c) 2006 Bernd Walter. All rights reserved. + * Copyright (c) 2006 M. Warner Losh. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Portions of this software may have been developed with reference to + * the SD Simplified Specification. The following disclaimer may apply: + * + * The following conditions apply to the release of the simplified + * specification ("Simplified Specification") by the SD Card Association and + * the SD Group. The Simplified Specification is a subset of the complete SD + * Specification which is owned by the SD Card Association and the SD + * Group. This Simplified Specification is provided on a non-confidential + * basis subject to the disclaimers below. Any implementation of the + * Simplified Specification may require a license from the SD Card + * Association, SD Group, SD-3C LLC or other third parties. + * + * Disclaimers: + * + * The information contained in the Simplified Specification is presented only + * as a standard specification for SD Cards and SD Host/Ancillary products and + * is provided "AS-IS" without any representations or warranties of any + * kind. No responsibility is assumed by the SD Group, SD-3C LLC or the SD + * Card Association for any damages, any infringements of patents or other + * right of the SD Group, SD-3C LLC, the SD Card Association or any third + * parties, which may result from its use. No license is granted by + * implication, estoppel or otherwise under any patent or other rights of the + * SD Group, SD-3C LLC, the SD Card Association or any third party. Nothing + * herein shall be construed as an obligation by the SD Group, the SD-3C LLC + * or the SD Card Association to disclose or distribute any technical + * information, know-how or other confidential information to any third party. + * + * $FreeBSD$ + */ + +#ifndef DEV_MMC_SUBR_H +#define DEV_MMC_SUBR_H + +struct mmc_command; + +int mmc_send_ext_csd(device_t brdev, device_t reqdev, uint8_t *rawextcsd); +int mmc_send_status(device_t brdev, device_t reqdev, uint16_t rca, + uint32_t *status); +int mmc_switch(device_t brdev, device_t reqdev, uint16_t rca, uint8_t set, + uint8_t index, uint8_t value, u_int timeout, bool send_status); +int mmc_switch_status(device_t brdev, device_t reqdev, uint16_t rca, + u_int timeout); +int mmc_wait_for_app_cmd(device_t brdev, device_t reqdev, uint16_t rca, + struct mmc_command *cmd, int retries); +int mmc_wait_for_cmd(device_t brdev, device_t reqdev, struct mmc_command *cmd, + int retries); + +#endif /* DEV_MMC_SUBR_H */ diff --git a/freebsd/sys/dev/mmc/mmcbrvar.h b/freebsd/sys/dev/mmc/mmcbrvar.h index 1f0a5714..77c304b4 100644 --- a/freebsd/sys/dev/mmc/mmcbrvar.h +++ b/freebsd/sys/dev/mmc/mmcbrvar.h @@ -49,14 +49,14 @@ * or the SD Card Association to disclose or distribute any technical * information, know-how or other confidential information to any third party. * - * "$FreeBSD$" + * $FreeBSD$ */ #ifndef DEV_MMC_MMCBRVAR_H -#define DEV_MMC_MMCBRVAR_H +#define DEV_MMC_MMCBRVAR_H -#include #include + #include enum mmcbr_device_ivars { @@ -71,15 +71,17 @@ enum mmcbr_device_ivars { MMCBR_IVAR_OCR, MMCBR_IVAR_POWER_MODE, MMCBR_IVAR_VDD, + MMCBR_IVAR_VCCQ, MMCBR_IVAR_CAPS, MMCBR_IVAR_TIMING, - MMCBR_IVAR_MAX_DATA + MMCBR_IVAR_MAX_DATA, + MMCBR_IVAR_MAX_BUSY_TIMEOUT }; /* - * Simplified accessors for pci devices + * Simplified accessors for bridge devices */ -#define MMCBR_ACCESSOR(var, ivar, type) \ +#define MMCBR_ACCESSOR(var, ivar, type) \ __BUS_ACCESSOR(mmcbr, var, MMCBR, ivar, type) MMCBR_ACCESSOR(bus_mode, BUS_MODE, int) @@ -93,19 +95,30 @@ MMCBR_ACCESSOR(mode, MODE, int) MMCBR_ACCESSOR(ocr, OCR, int) MMCBR_ACCESSOR(power_mode, POWER_MODE, int) MMCBR_ACCESSOR(vdd, VDD, int) +MMCBR_ACCESSOR(vccq, VCCQ, int) MMCBR_ACCESSOR(caps, CAPS, int) MMCBR_ACCESSOR(timing, TIMING, int) MMCBR_ACCESSOR(max_data, MAX_DATA, int) +MMCBR_ACCESSOR(max_busy_timeout, MAX_BUSY_TIMEOUT, u_int) static int __inline mmcbr_update_ios(device_t dev) { + return (MMCBR_UPDATE_IOS(device_get_parent(dev), dev)); } +static int __inline +mmcbr_switch_vccq(device_t dev) +{ + + return (MMCBR_SWITCH_VCCQ(device_get_parent(dev), dev)); +} + static int __inline mmcbr_get_ro(device_t dev) { + return (MMCBR_GET_RO(device_get_parent(dev), dev)); } diff --git a/freebsd/sys/dev/mmc/mmcreg.h b/freebsd/sys/dev/mmc/mmcreg.h index ba4ca93a..359f31d5 100644 --- a/freebsd/sys/dev/mmc/mmcreg.h +++ b/freebsd/sys/dev/mmc/mmcreg.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 2006 M. Warner Losh. All rights reserved. + * Copyright (c) 2017 Marius Strobl * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -55,7 +56,7 @@ #define DEV_MMC_MMCREG_H /* - * This file contains the register definitions for the mmc and sd busses. + * This file contains the register definitions for the mmc and sd buses. * They are taken from publicly available sources. */ @@ -100,7 +101,7 @@ struct mmc_command { #define MMC_ERR_FAILED 4 #define MMC_ERR_INVALID 5 #define MMC_ERR_NO_MEMORY 6 -#define MMC_ERR_MAX 6 +#define MMC_ERR_MAX 6 struct mmc_data *data; /* Data segment with cmd */ struct mmc_request *mrq; /* backpointer to request */ }; @@ -140,6 +141,7 @@ struct mmc_command { #define R1_ERASE_RESET (1u << 13) /* sr, c */ #define R1_CURRENT_STATE_MASK (0xfu << 9) /* sx, b */ #define R1_READY_FOR_DATA (1u << 8) /* sx, a */ +#define R1_SWITCH_ERROR (1u << 7) /* sx, c */ #define R1_APP_CMD (1u << 5) /* sr, c */ #define R1_AKE_SEQ_ERROR (1u << 3) /* er, c */ #define R1_STATUS(x) ((x) & 0xFFFFE000) @@ -184,7 +186,7 @@ struct mmc_request { #define MMC_SET_RELATIVE_ADDR 3 #define SD_SEND_RELATIVE_ADDR 3 #define MMC_SET_DSR 4 - /* reserved: 5 */ +#define MMC_SLEEP_AWAKE 5 #define MMC_SWITCH_FUNC 6 #define MMC_SWITCH_FUNC_CMDS 0 #define MMC_SWITCH_FUNC_SET 1 @@ -207,11 +209,11 @@ struct mmc_request { #define MMC_SET_BLOCKLEN 16 #define MMC_READ_SINGLE_BLOCK 17 #define MMC_READ_MULTIPLE_BLOCK 18 - /* reserved: 19 */ +#define MMC_SEND_TUNING_BLOCK 19 +#define MMC_SEND_TUNING_BLOCK_HS200 21 /* Class 3: Stream write commands */ #define MMC_WRITE_DAT_UNTIL_STOP 20 - /* reserved: 21 */ /* reserved: 22 */ /* Class 4: Block oriented write commands */ @@ -278,7 +280,6 @@ struct mmc_request { /* reserved: 50 */ /* reserved: 57 */ - /* Application specific commands for SD */ #define ACMD_SET_BUS_WIDTH 6 #define ACMD_SD_STATUS 13 @@ -291,52 +292,153 @@ struct mmc_request { /* * EXT_CSD fields */ -#define EXT_CSD_ERASE_GRP_DEF 175 /* R/W */ -#define EXT_CSD_BUS_WIDTH 183 /* R/W */ -#define EXT_CSD_HS_TIMING 185 /* R/W */ -#define EXT_CSD_CARD_TYPE 196 /* RO */ -#define EXT_CSD_REV 192 /* RO */ -#define EXT_CSD_SEC_CNT 212 /* RO, 4 bytes */ -#define EXT_CSD_ERASE_TO_MULT 223 /* RO */ -#define EXT_CSD_ERASE_GRP_SIZE 224 /* RO */ +#define EXT_CSD_EXT_PART_ATTR 52 /* R/W, 2 bytes */ +#define EXT_CSD_ENH_START_ADDR 136 /* R/W, 4 bytes */ +#define EXT_CSD_ENH_SIZE_MULT 140 /* R/W, 3 bytes */ +#define EXT_CSD_GP_SIZE_MULT 143 /* R/W, 12 bytes */ +#define EXT_CSD_PART_SET 155 /* R/W */ +#define EXT_CSD_PART_ATTR 156 /* R/W */ +#define EXT_CSD_PART_SUPPORT 160 /* RO */ +#define EXT_CSD_RPMB_MULT 168 /* RO */ +#define EXT_CSD_BOOT_WP_STATUS 174 /* RO */ +#define EXT_CSD_ERASE_GRP_DEF 175 /* R/W */ +#define EXT_CSD_PART_CONFIG 179 /* R/W */ +#define EXT_CSD_BUS_WIDTH 183 /* R/W */ +#define EXT_CSD_STROBE_SUPPORT 184 /* RO */ +#define EXT_CSD_HS_TIMING 185 /* R/W */ +#define EXT_CSD_POWER_CLASS 187 /* R/W */ +#define EXT_CSD_CARD_TYPE 196 /* RO */ +#define EXT_CSD_DRIVER_STRENGTH 197 /* RO */ +#define EXT_CSD_REV 192 /* RO */ +#define EXT_CSD_PART_SWITCH_TO 199 /* RO */ +#define EXT_CSD_PWR_CL_52_195 200 /* RO */ +#define EXT_CSD_PWR_CL_26_195 201 /* RO */ +#define EXT_CSD_PWR_CL_52_360 202 /* RO */ +#define EXT_CSD_PWR_CL_26_360 203 /* RO */ +#define EXT_CSD_SEC_CNT 212 /* RO, 4 bytes */ +#define EXT_CSD_HC_WP_GRP_SIZE 221 /* RO */ +#define EXT_CSD_ERASE_TO_MULT 223 /* RO */ +#define EXT_CSD_ERASE_GRP_SIZE 224 /* RO */ +#define EXT_CSD_BOOT_SIZE_MULT 226 /* RO */ +#define EXT_CSD_PWR_CL_200_195 236 /* RO */ +#define EXT_CSD_PWR_CL_200_360 237 /* RO */ +#define EXT_CSD_PWR_CL_52_195_DDR 238 /* RO */ +#define EXT_CSD_PWR_CL_52_360_DDR 239 /* RO */ +#define EXT_CSD_GEN_CMD6_TIME 248 /* RO */ +#define EXT_CSD_PWR_CL_200_360_DDR 253 /* RO */ /* * EXT_CSD field definitions */ -#define EXT_CSD_CMD_SET_NORMAL 1 -#define EXT_CSD_CMD_SET_SECURE 2 -#define EXT_CSD_CMD_SET_CPSECURE 4 - -#define EXT_CSD_CARD_TYPE_26 1 -#define EXT_CSD_CARD_TYPE_52 2 - -#define EXT_CSD_BUS_WIDTH_1 0 -#define EXT_CSD_BUS_WIDTH_4 1 -#define EXT_CSD_BUS_WIDTH_8 2 - -#define MMC_TYPE_26_MAX_HS 26000000 -#define MMC_TYPE_52_MAX_HS 52000000 +#define EXT_CSD_EXT_PART_ATTR_DEFAULT 0x0 +#define EXT_CSD_EXT_PART_ATTR_SYSTEMCODE 0x1 +#define EXT_CSD_EXT_PART_ATTR_NPERSISTENT 0x2 + +#define EXT_CSD_PART_SET_COMPLETED 0x01 + +#define EXT_CSD_PART_ATTR_ENH_USR 0x01 +#define EXT_CSD_PART_ATTR_ENH_GP0 0x02 +#define EXT_CSD_PART_ATTR_ENH_GP1 0x04 +#define EXT_CSD_PART_ATTR_ENH_GP2 0x08 +#define EXT_CSD_PART_ATTR_ENH_GP3 0x10 +#define EXT_CSD_PART_ATTR_ENH_MASK 0x1f + +#define EXT_CSD_PART_SUPPORT_EN 0x01 +#define EXT_CSD_PART_SUPPORT_ENH_ATTR_EN 0x02 +#define EXT_CSD_PART_SUPPORT_EXT_ATTR_EN 0x04 + +#define EXT_CSD_BOOT_WP_STATUS_BOOT0_PWR 0x01 +#define EXT_CSD_BOOT_WP_STATUS_BOOT0_PERM 0x02 +#define EXT_CSD_BOOT_WP_STATUS_BOOT0_MASK 0x03 +#define EXT_CSD_BOOT_WP_STATUS_BOOT1_PWR 0x04 +#define EXT_CSD_BOOT_WP_STATUS_BOOT1_PERM 0x08 +#define EXT_CSD_BOOT_WP_STATUS_BOOT1_MASK 0x0c + +#define EXT_CSD_ERASE_GRP_DEF_EN 0x01 + +#define EXT_CSD_PART_CONFIG_ACC_DEFAULT 0x00 +#define EXT_CSD_PART_CONFIG_ACC_BOOT0 0x01 +#define EXT_CSD_PART_CONFIG_ACC_BOOT1 0x02 +#define EXT_CSD_PART_CONFIG_ACC_RPMB 0x03 +#define EXT_CSD_PART_CONFIG_ACC_GP0 0x04 +#define EXT_CSD_PART_CONFIG_ACC_GP1 0x05 +#define EXT_CSD_PART_CONFIG_ACC_GP2 0x06 +#define EXT_CSD_PART_CONFIG_ACC_GP3 0x07 +#define EXT_CSD_PART_CONFIG_ACC_MASK 0x07 +#define EXT_CSD_PART_CONFIG_BOOT0 0x08 +#define EXT_CSD_PART_CONFIG_BOOT1 0x10 +#define EXT_CSD_PART_CONFIG_BOOT_USR 0x38 +#define EXT_CSD_PART_CONFIG_BOOT_MASK 0x38 +#define EXT_CSD_PART_CONFIG_BOOT_ACK 0x40 + +#define EXT_CSD_CMD_SET_NORMAL 1 +#define EXT_CSD_CMD_SET_SECURE 2 +#define EXT_CSD_CMD_SET_CPSECURE 4 + +#define EXT_CSD_HS_TIMING_BC 0 +#define EXT_CSD_HS_TIMING_HS 1 +#define EXT_CSD_HS_TIMING_DDR200 2 +#define EXT_CSD_HS_TIMING_DDR400 3 +#define EXT_CSD_HS_TIMING_DRV_STR_SHIFT 4 + +#define EXT_CSD_POWER_CLASS_8BIT_MASK 0xf0 +#define EXT_CSD_POWER_CLASS_8BIT_SHIFT 4 +#define EXT_CSD_POWER_CLASS_4BIT_MASK 0x0f +#define EXT_CSD_POWER_CLASS_4BIT_SHIFT 0 + +#define EXT_CSD_CARD_TYPE_HS_26 0x0001 +#define EXT_CSD_CARD_TYPE_HS_52 0x0002 +#define EXT_CSD_CARD_TYPE_DDR_52_1_8V 0x0004 +#define EXT_CSD_CARD_TYPE_DDR_52_1_2V 0x0008 +#define EXT_CSD_CARD_TYPE_HS200_1_8V 0x0010 +#define EXT_CSD_CARD_TYPE_HS200_1_2V 0x0020 +#define EXT_CSD_CARD_TYPE_HS400_1_8V 0x0040 +#define EXT_CSD_CARD_TYPE_HS400_1_2V 0x0080 +#define EXT_CSD_CARD_TYPE_HS400ES 0x0100 + +#define EXT_CSD_BUS_WIDTH_1 0 +#define EXT_CSD_BUS_WIDTH_4 1 +#define EXT_CSD_BUS_WIDTH_8 2 +#define EXT_CSD_BUS_WIDTH_4_DDR 5 +#define EXT_CSD_BUS_WIDTH_8_DDR 6 +#define EXT_CSD_BUS_WIDTH_ES 0x80 + +#define MMC_TYPE_HS_26_MAX 26000000 +#define MMC_TYPE_HS_52_MAX 52000000 +#define MMC_TYPE_DDR52_MAX 52000000 +#define MMC_TYPE_HS200_HS400ES_MAX 200000000 /* * SD bus widths */ -#define SD_BUS_WIDTH_1 0 -#define SD_BUS_WIDTH_4 2 +#define SD_BUS_WIDTH_1 0 +#define SD_BUS_WIDTH_4 2 /* * SD Switch */ -#define SD_SWITCH_MODE_CHECK 0 -#define SD_SWITCH_MODE_SET 1 -#define SD_SWITCH_GROUP1 0 -#define SD_SWITCH_NORMAL_MODE 0 -#define SD_SWITCH_HS_MODE 1 -#define SD_SWITCH_NOCHANGE 0xF +#define SD_SWITCH_MODE_CHECK 0 +#define SD_SWITCH_MODE_SET 1 +#define SD_SWITCH_GROUP1 0 +#define SD_SWITCH_NORMAL_MODE 0 +#define SD_SWITCH_HS_MODE 1 +#define SD_SWITCH_SDR50_MODE 2 +#define SD_SWITCH_SDR104_MODE 3 +#define SD_SWITCH_DDR50 4 +#define SD_SWITCH_NOCHANGE 0xF #define SD_CLR_CARD_DETECT 0 #define SD_SET_CARD_DETECT 1 -#define SD_MAX_HS 50000000 +#define SD_HS_MAX 50000000 +#define SD_DDR50_MAX 50000000 +#define SD_SDR12_MAX 25000000 +#define SD_SDR25_MAX 50000000 +#define SD_SDR50_MAX 100000000 +#define SD_SDR104_MAX 208000000 + +/* Specifications require 400 kHz max. during ID phase. */ +#define SD_MMC_CARD_ID_FREQUENCY 400000 /* OCR bits */ @@ -373,6 +475,12 @@ struct mmc_request { #define MMC_OCR_340_350 (1U << 22) /* Vdd voltage 3.40 ~ 3.50 */ #define MMC_OCR_350_360 (1U << 23) /* Vdd voltage 3.50 ~ 3.60 */ #define MMC_OCR_MAX_VOLTAGE_SHIFT 23 +#define MMC_OCR_S18R (1U << 24) /* Switching to 1.8 V requested (SD) */ +#define MMC_OCR_S18A MMC_OCR_S18R /* Switching to 1.8 V accepted (SD) */ +#define MMC_OCR_XPC (1U << 28) /* SDXC Power Control */ +#define MMC_OCR_ACCESS_MODE_BYTE (0U << 29) /* Access Mode Byte (MMC) */ +#define MMC_OCR_ACCESS_MODE_SECT (1U << 29) /* Access Mode Sector (MMC) */ +#define MMC_OCR_ACCESS_MODE_MASK (3U << 29) #define MMC_OCR_CCS (1u << 30) /* Card Capacity status (SD vs SDHC) */ #define MMC_OCR_CARD_BUSY (1U << 31) /* Card Power up status */ @@ -419,8 +527,8 @@ struct mmc_scr { unsigned char sda_vsn; unsigned char bus_widths; -#define SD_SCR_BUS_WIDTH_1 (1<<0) -#define SD_SCR_BUS_WIDTH_4 (1<<2) +#define SD_SCR_BUS_WIDTH_1 (1 << 0) +#define SD_SCR_BUS_WIDTH_4 (1 << 2) }; struct mmc_sd_status @@ -437,12 +545,22 @@ struct mmc_sd_status uint8_t erase_offset; }; +/* + * Various MMC/SD constants + */ +#define MMC_BOOT_RPMB_BLOCK_SIZE (128 * 1024) + +#define MMC_EXTCSD_SIZE 512 + +#define MMC_PART_GP_MAX 4 +#define MMC_PART_MAX 8 + /* * Older versions of the MMC standard had a variable sector size. However, * I've been able to find no old MMC or SD cards that have a non 512 * byte sector size anywhere, so we assume that such cards are very rare * and only note their existence in passing here... */ -#define MMC_SECTOR_SIZE 512 +#define MMC_SECTOR_SIZE 512 #endif /* DEV_MMCREG_H */ diff --git a/freebsd/sys/dev/mmc/mmcsd.c b/freebsd/sys/dev/mmc/mmcsd.c index a39d51fe..c1cfbe8b 100644 --- a/freebsd/sys/dev/mmc/mmcsd.c +++ b/freebsd/sys/dev/mmc/mmcsd.c @@ -3,6 +3,7 @@ /*- * Copyright (c) 2006 Bernd Walter. All rights reserved. * Copyright (c) 2006 M. Warner Losh. All rights reserved. + * Copyright (c) 2017 Marius Strobl * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -60,16 +61,23 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include #include #include #include #include #include +#include #include + #include #include +#include +#include +#include #include #include #include @@ -88,19 +96,48 @@ __FBSDID("$FreeBSD$"); #define kproc_exit kthread_exit #endif -struct mmcsd_softc { - device_t dev; - struct mtx sc_mtx; +#define MMCSD_CMD_RETRIES 5 + +#define MMCSD_FMT_BOOT "mmcsd%dboot" +#define MMCSD_FMT_GP "mmcsd%dgp" +#define MMCSD_FMT_RPMB "mmcsd%drpmb" +#define MMCSD_LABEL_ENH "enh" + +#define MMCSD_PART_NAMELEN (16 + 1) + +struct mmcsd_softc; + +struct mmcsd_part { + struct mtx part_mtx; + struct mmcsd_softc *sc; #ifndef __rtems__ struct disk *disk; struct proc *p; struct bio_queue_head bio_queue; daddr_t eblock, eend; /* Range remaining after the last erase. */ +#endif /* __rtems__ */ + u_int cnt; + u_int type; int running; int suspend; + bool ro; + char name[MMCSD_PART_NAMELEN]; +}; + +struct mmcsd_softc { + device_t dev; + device_t mmcbr; + struct mmcsd_part *part[MMC_PART_MAX]; + enum mmc_card_mode mode; + uint8_t part_curr; /* Partition currently switched to */ + uint8_t ext_csd[MMC_EXTCSD_SIZE]; + uint16_t rca; + uint32_t part_time; /* Partition switch timeout [us] */ + off_t enh_base; /* Enhanced user data area slice base ... */ + off_t enh_size; /* ... and size [bytes] */ int log_count; struct timeval log_time; -#endif /* __rtems__ */ + struct cdev *rpmb_dev; }; #ifndef __rtems__ @@ -127,26 +164,50 @@ static int mmcsd_probe(device_t dev); /* disk routines */ static int mmcsd_close(struct disk *dp); static int mmcsd_dump(void *arg, void *virtual, vm_offset_t physical, - off_t offset, size_t length); + off_t offset, size_t length); +static int mmcsd_getattr(struct bio *); +static int mmcsd_ioctl_disk(struct disk *disk, u_long cmd, void *data, + int fflag, struct thread *td); static int mmcsd_open(struct disk *dp); static void mmcsd_strategy(struct bio *bp); static void mmcsd_task(void *arg); #endif /* __rtems__ */ +/* RMPB cdev interface */ +static int mmcsd_ioctl_rpmb(struct cdev *dev, u_long cmd, caddr_t data, + int fflag, struct thread *td); + +static void mmcsd_add_part(struct mmcsd_softc *sc, u_int type, + const char *name, u_int cnt, off_t media_size, off_t erase_size, bool ro); static int mmcsd_bus_bit_width(device_t dev); #ifndef __rtems__ -static daddr_t mmcsd_delete(struct mmcsd_softc *sc, struct bio *bp); -static daddr_t mmcsd_rw(struct mmcsd_softc *sc, struct bio *bp); +static daddr_t mmcsd_delete(struct mmcsd_part *part, struct bio *bp); #endif /* __rtems__ */ - -#define MMCSD_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) -#define MMCSD_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) -#define MMCSD_LOCK_INIT(_sc) \ - mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \ - "mmcsd", MTX_DEF) -#define MMCSD_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); -#define MMCSD_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); -#define MMCSD_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); +static int mmcsd_ioctl(struct mmcsd_part *part, u_long cmd, void *data, + int fflag); +static int mmcsd_ioctl_cmd(struct mmcsd_part *part, struct mmc_ioc_cmd *mic, + int fflag); +static uintmax_t mmcsd_pretty_size(off_t size, char *unit); +#ifndef __rtems__ +static daddr_t mmcsd_rw(struct mmcsd_part *part, struct bio *bp); +#endif /* __rtems__ */ +static int mmcsd_set_blockcount(struct mmcsd_softc *sc, u_int count, bool rel); +#ifndef __rtems__ +static int mmcsd_slicer(device_t dev, const char *provider, + struct flash_slice *slices, int *nslices); +#endif /* __rtems__ */ +static int mmcsd_switch_part(device_t bus, device_t dev, uint16_t rca, + u_int part); + +#define MMCSD_PART_LOCK(_part) mtx_lock(&(_part)->part_mtx) +#define MMCSD_PART_UNLOCK(_part) mtx_unlock(&(_part)->part_mtx) +#define MMCSD_PART_LOCK_INIT(_part) \ + mtx_init(&(_part)->part_mtx, (_part)->name, "mmcsd part", MTX_DEF) +#define MMCSD_PART_LOCK_DESTROY(_part) mtx_destroy(&(_part)->part_mtx); +#define MMCSD_PART_ASSERT_LOCKED(_part) \ + mtx_assert(&(_part)->part_mtx, MA_OWNED); +#define MMCSD_PART_ASSERT_UNLOCKED(_part) \ + mtx_assert(&(_part)->part_mtx, MA_NOTOWNED); static int mmcsd_probe(device_t dev) @@ -159,10 +220,9 @@ mmcsd_probe(device_t dev) #ifdef __rtems__ static rtems_status_code -rtems_bsd_mmcsd_set_block_size(struct mmcsd_softc *self, uint32_t block_size) +rtems_bsd_mmcsd_set_block_size(device_t dev, uint32_t block_size) { rtems_status_code status_code = RTEMS_SUCCESSFUL; - device_t dev = self->dev; struct mmc_command cmd; struct mmc_request req; @@ -183,10 +243,11 @@ rtems_bsd_mmcsd_set_block_size(struct mmcsd_softc *self, uint32_t block_size) } static int -rtems_bsd_mmcsd_disk_read_write(struct mmcsd_softc *self, rtems_blkdev_request *blkreq) +rtems_bsd_mmcsd_disk_read_write(struct mmcsd_part *part, rtems_blkdev_request *blkreq) { rtems_status_code status_code = RTEMS_SUCCESSFUL; - device_t dev = self->dev; + struct mmcsd_softc *sc = part->sc; + device_t dev = sc->dev; int shift = mmc_get_high_cap(dev) ? 0 : 9; int rca = mmc_get_rca(dev); uint32_t buffer_count = blkreq->bufnum; @@ -216,7 +277,7 @@ rtems_bsd_mmcsd_disk_read_write(struct mmcsd_softc *self, rtems_blkdev_request * data_flags = MMC_DATA_READ; } - MMCSD_LOCK(self); + MMCSD_PART_LOCK(part); for (i = 0; i < buffer_count; ++i) { rtems_blkdev_sg_buffer *sg = &blkreq->bufs [i]; @@ -293,7 +354,7 @@ rtems_bsd_mmcsd_disk_read_write(struct mmcsd_softc *self, rtems_blkdev_request * error: - MMCSD_UNLOCK(self); + MMCSD_PART_UNLOCK(part); rtems_blkdev_request_done(blkreq, status_code); @@ -303,12 +364,12 @@ error: static int rtems_bsd_mmcsd_disk_ioctl(rtems_disk_device *dd, uint32_t req, void *arg) { - struct mmcsd_softc *self = rtems_disk_get_driver_data(dd); if (req == RTEMS_BLKIO_REQUEST) { + struct mmcsd_part *part = rtems_disk_get_driver_data(dd); rtems_blkdev_request *blkreq = arg; - return rtems_bsd_mmcsd_disk_read_write(self, blkreq); + return rtems_bsd_mmcsd_disk_read_write(part, blkreq); } else if (req == RTEMS_BLKIO_CAPABILITIES) { *(uint32_t *) arg = RTEMS_BLKDEV_CAP_MULTISECTOR_CONT; return 0; @@ -321,11 +382,12 @@ static rtems_status_code rtems_bsd_mmcsd_attach_worker(rtems_media_state state, const char *src, char **dest, void *arg) { rtems_status_code status_code = RTEMS_SUCCESSFUL; - struct mmcsd_softc *self = arg; + struct mmcsd_part *part = arg; char *disk = NULL; if (state == RTEMS_MEDIA_STATE_READY) { - device_t dev = self->dev; + struct mmcsd_softc *sc = part->sc; + device_t dev = sc->dev; uint32_t block_count = mmc_get_media_size(dev); uint32_t block_size = MMC_SECTOR_SIZE; @@ -337,14 +399,14 @@ rtems_bsd_mmcsd_attach_worker(rtems_media_state state, const char *src, char **d MMCBUS_ACQUIRE_BUS(device_get_parent(dev), dev); - status_code = rtems_bsd_mmcsd_set_block_size(self, block_size); + status_code = rtems_bsd_mmcsd_set_block_size(dev, block_size); if (status_code != RTEMS_SUCCESSFUL) { printf("OOPS: set block size failed\n"); goto error; } status_code = rtems_blkdev_create(disk, block_size, - block_count, rtems_bsd_mmcsd_disk_ioctl, self); + block_count, rtems_bsd_mmcsd_disk_ioctl, part); if (status_code != RTEMS_SUCCESSFUL) { goto error; } @@ -363,128 +425,412 @@ error: static int mmcsd_attach(device_t dev) { + device_t mmcbr; struct mmcsd_softc *sc; -#ifndef __rtems__ - struct disk *d; -#else /* __rtems__ */ - struct { - char d_ident[16]; - char d_descr[64]; - } x, *d = &x; -#endif /* __rtems__ */ - intmax_t mb; - uint32_t speed; - uint32_t maxblocks; - char unit; + const uint8_t *ext_csd; + off_t erase_size, sector_size, size, wp_size; + uintmax_t bytes; + int err, i; + uint8_t rev; + bool comp, ro; + char unit[2]; sc = device_get_softc(dev); sc->dev = dev; - MMCSD_LOCK_INIT(sc); + sc->mmcbr = mmcbr = device_get_parent(dev); + sc->mode = mmcbr_get_mode(mmcbr); + sc->rca = mmc_get_rca(dev); -#ifndef __rtems__ - d = sc->disk = disk_alloc(); - d->d_open = mmcsd_open; - d->d_close = mmcsd_close; - d->d_strategy = mmcsd_strategy; - d->d_dump = mmcsd_dump; - d->d_name = "mmcsd"; - d->d_drv1 = sc; - d->d_sectorsize = mmc_get_sector_size(dev); - d->d_maxsize = mmc_get_max_data(dev) * d->d_sectorsize; - d->d_mediasize = (off_t)mmc_get_media_size(dev) * d->d_sectorsize; - d->d_stripesize = mmc_get_erase_sector(dev) * d->d_sectorsize; - d->d_unit = device_get_unit(dev); - d->d_flags = DISKFLAG_CANDELETE; - d->d_delmaxsize = mmc_get_erase_sector(dev) * d->d_sectorsize; -#endif /* __rtems__ */ - strlcpy(d->d_ident, mmc_get_card_sn_string(dev), sizeof(d->d_ident)); - strlcpy(d->d_descr, mmc_get_card_id_string(dev), sizeof(d->d_descr)); + /* Only MMC >= 4.x devices support EXT_CSD. */ + if (mmc_get_spec_vers(dev) >= 4) { + MMCBUS_ACQUIRE_BUS(mmcbr, dev); + err = mmc_send_ext_csd(mmcbr, dev, sc->ext_csd); + MMCBUS_RELEASE_BUS(mmcbr, dev); + if (err != MMC_ERR_NONE) + bzero(sc->ext_csd, sizeof(sc->ext_csd)); + } + ext_csd = sc->ext_csd; -#ifndef __rtems__ /* - * Display in most natural units. There's no cards < 1MB. The SD - * standard goes to 2GiB due to its reliance on FAT, but the data - * format supports up to 4GiB and some card makers push it up to this - * limit. The SDHC standard only goes to 32GiB due to FAT32, but the - * data format supports up to 2TiB however. 2048GB isn't too ugly, so - * we note it in passing here and don't add the code to print - * TB). Since these cards are sold in terms of MB and GB not MiB and - * GiB, report them like that. We also round to the nearest unit, since - * many cards are a few percent short, even of the power of 10 size. + * Enhanced user data area and general purpose partitions are only + * supported in revision 1.4 (EXT_CSD_REV == 4) and later, the RPMB + * partition in revision 1.5 (MMC v4.41, EXT_CSD_REV == 5) and later. */ - mb = (d->d_mediasize + 1000000 / 2 - 1) / 1000000; -#else /* __rtems__ */ - mb = mmc_get_media_size(dev); - mb *= mmc_get_sector_size(dev); - mb = (mb + 1000000 / 2 - 1) / 1000000; -#endif /* __rtems__ */ - unit = 'M'; - if (mb >= 1000) { - unit = 'G'; - mb = (mb + 1000 / 2 - 1) / 1000; + rev = ext_csd[EXT_CSD_REV]; + + /* + * Ignore user-creatable enhanced user data area and general purpose + * partitions partitions as long as partitioning hasn't been finished. + */ + comp = (ext_csd[EXT_CSD_PART_SET] & EXT_CSD_PART_SET_COMPLETED) != 0; + + /* + * Add enhanced user data area slice, unless it spans the entirety of + * the user data area. The enhanced area is of a multiple of high + * capacity write protect groups ((ERASE_GRP_SIZE + HC_WP_GRP_SIZE) * + * 512 KB) and its offset given in either sectors or bytes, depending + * on whether it's a high capacity device or not. + * NB: The slicer and its slices need to be registered before adding + * the disk for the corresponding user data area as re-tasting is + * racy. + */ + sector_size = mmc_get_sector_size(dev); + size = ext_csd[EXT_CSD_ENH_SIZE_MULT] + + (ext_csd[EXT_CSD_ENH_SIZE_MULT + 1] << 8) + + (ext_csd[EXT_CSD_ENH_SIZE_MULT + 2] << 16); + if (rev >= 4 && comp == TRUE && size > 0 && + (ext_csd[EXT_CSD_PART_SUPPORT] & + EXT_CSD_PART_SUPPORT_ENH_ATTR_EN) != 0 && + (ext_csd[EXT_CSD_PART_ATTR] & (EXT_CSD_PART_ATTR_ENH_USR)) != 0) { + erase_size = ext_csd[EXT_CSD_ERASE_GRP_SIZE] * 1024 * + MMC_SECTOR_SIZE; + wp_size = ext_csd[EXT_CSD_HC_WP_GRP_SIZE]; + size *= erase_size * wp_size; + if (size != mmc_get_media_size(dev) * sector_size) { + sc->enh_size = size; + sc->enh_base = (ext_csd[EXT_CSD_ENH_START_ADDR] + + (ext_csd[EXT_CSD_ENH_START_ADDR + 1] << 8) + + (ext_csd[EXT_CSD_ENH_START_ADDR + 2] << 16) + + (ext_csd[EXT_CSD_ENH_START_ADDR + 3] << 24)) * + (mmc_get_high_cap(dev) ? MMC_SECTOR_SIZE : 1); + } else if (bootverbose) + device_printf(dev, + "enhanced user data area spans entire device\n"); } + + /* + * Add default partition. This may be the only one or the user + * data area in case partitions are supported. + */ + ro = mmc_get_read_only(dev); + mmcsd_add_part(sc, EXT_CSD_PART_CONFIG_ACC_DEFAULT, "mmcsd", + device_get_unit(dev), mmc_get_media_size(dev) * sector_size, + mmc_get_erase_sector(dev) * sector_size, ro); + + if (mmc_get_spec_vers(dev) < 3) + return (0); + + /* Belatedly announce enhanced user data slice. */ + if (sc->enh_size != 0) { + bytes = mmcsd_pretty_size(size, unit); + printf(FLASH_SLICES_FMT ": %ju%sB enhanced user data area " + "slice offset 0x%jx at %s\n", device_get_nameunit(dev), + MMCSD_LABEL_ENH, bytes, unit, (uintmax_t)sc->enh_base, + device_get_nameunit(dev)); + } + + /* + * Determine partition switch timeout (provided in units of 10 ms) + * and ensure it's at least 300 ms as some eMMC chips lie. + */ + sc->part_time = max(ext_csd[EXT_CSD_PART_SWITCH_TO] * 10 * 1000, + 300 * 1000); + + /* Add boot partitions, which are of a fixed multiple of 128 KB. */ + size = ext_csd[EXT_CSD_BOOT_SIZE_MULT] * MMC_BOOT_RPMB_BLOCK_SIZE; + if (size > 0 && (mmcbr_get_caps(mmcbr) & MMC_CAP_BOOT_NOACC) == 0) { + mmcsd_add_part(sc, EXT_CSD_PART_CONFIG_ACC_BOOT0, + MMCSD_FMT_BOOT, 0, size, MMC_BOOT_RPMB_BLOCK_SIZE, + ro | ((ext_csd[EXT_CSD_BOOT_WP_STATUS] & + EXT_CSD_BOOT_WP_STATUS_BOOT0_MASK) != 0)); + mmcsd_add_part(sc, EXT_CSD_PART_CONFIG_ACC_BOOT1, + MMCSD_FMT_BOOT, 1, size, MMC_BOOT_RPMB_BLOCK_SIZE, + ro | ((ext_csd[EXT_CSD_BOOT_WP_STATUS] & + EXT_CSD_BOOT_WP_STATUS_BOOT1_MASK) != 0)); + } + + /* Add RPMB partition, which also is of a fixed multiple of 128 KB. */ + size = ext_csd[EXT_CSD_RPMB_MULT] * MMC_BOOT_RPMB_BLOCK_SIZE; + if (rev >= 5 && size > 0) + mmcsd_add_part(sc, EXT_CSD_PART_CONFIG_ACC_RPMB, + MMCSD_FMT_RPMB, 0, size, MMC_BOOT_RPMB_BLOCK_SIZE, ro); + + if (rev <= 3 || comp == FALSE) + return (0); + + /* + * Add general purpose partitions, which are of a multiple of high + * capacity write protect groups, too. + */ + if ((ext_csd[EXT_CSD_PART_SUPPORT] & EXT_CSD_PART_SUPPORT_EN) != 0) { + erase_size = ext_csd[EXT_CSD_ERASE_GRP_SIZE] * 1024 * + MMC_SECTOR_SIZE; + wp_size = ext_csd[EXT_CSD_HC_WP_GRP_SIZE]; + for (i = 0; i < MMC_PART_GP_MAX; i++) { + size = ext_csd[EXT_CSD_GP_SIZE_MULT + i * 3] + + (ext_csd[EXT_CSD_GP_SIZE_MULT + i * 3 + 1] << 8) + + (ext_csd[EXT_CSD_GP_SIZE_MULT + i * 3 + 2] << 16); + if (size == 0) + continue; + mmcsd_add_part(sc, EXT_CSD_PART_CONFIG_ACC_GP0 + i, + MMCSD_FMT_GP, i, size * erase_size * wp_size, + erase_size, ro); + } + } + return (0); +} + +static uintmax_t +mmcsd_pretty_size(off_t size, char *unit) +{ + uintmax_t bytes; + int i; + /* - * Report the clock speed of the underlying hardware, which might be - * different than what the card reports due to hardware limitations. - * Report how many blocks the hardware transfers at once. + * Display in most natural units. There's no card < 1MB. However, + * RPMB partitions occasionally are smaller than that, though. The + * SD standard goes to 2 GiB due to its reliance on FAT, but the data + * format supports up to 4 GiB and some card makers push it up to this + * limit. The SDHC standard only goes to 32 GiB due to FAT32, but the + * data format supports up to 2 TiB however. 2048 GB isn't too ugly, + * so we note it in passing here and don't add the code to print TB). + * Since these cards are sold in terms of MB and GB not MiB and GiB, + * report them like that. We also round to the nearest unit, since + * many cards are a few percent short, even of the power of 10 size. */ - speed = mmcbr_get_clock(device_get_parent(dev)); - maxblocks = mmc_get_max_data(dev); - device_printf(dev, "%ju%cB <%s>%s at %s %d.%01dMHz/%dbit/%d-block\n", - mb, unit, d->d_descr, - mmc_get_read_only(dev) ? " (read-only)" : "", - device_get_nameunit(device_get_parent(dev)), - speed / 1000000, (speed / 100000) % 10, - mmcsd_bus_bit_width(dev), maxblocks); + bytes = size; + unit[0] = unit[1] = '\0'; + for (i = 0; i <= 2 && bytes >= 1000; i++) { + bytes = (bytes + 1000 / 2 - 1) / 1000; + switch (i) { + case 0: + unit[0] = 'k'; + break; + case 1: + unit[0] = 'M'; + break; + case 2: + unit[0] = 'G'; + break; + default: + break; + } + } + return (bytes); +} + +static struct cdevsw mmcsd_rpmb_cdevsw = { + .d_version = D_VERSION, + .d_name = "mmcsdrpmb", + .d_ioctl = mmcsd_ioctl_rpmb +}; + +static void +mmcsd_add_part(struct mmcsd_softc *sc, u_int type, const char *name, u_int cnt, + off_t media_size, off_t erase_size, bool ro) +{ + struct make_dev_args args; + device_t dev, mmcbr; + const char *ext; + const uint8_t *ext_csd; + struct mmcsd_part *part; #ifndef __rtems__ - disk_create(d, DISK_VERSION); - bioq_init(&sc->bio_queue); - - sc->running = 1; - sc->suspend = 0; - sc->eblock = sc->eend = 0; - kproc_create(&mmcsd_task, sc, &sc->p, 0, 0, "%s: mmc/sd card", - device_get_nameunit(dev)); + struct disk *d; +#endif /* __rtems__ */ + uintmax_t bytes; + u_int gp; + uint32_t speed; + uint8_t extattr; + bool enh; + char unit[2]; + + dev = sc->dev; + mmcbr = sc->mmcbr; + part = sc->part[type] = malloc(sizeof(*part), M_DEVBUF, + M_WAITOK | M_ZERO); + part->sc = sc; + part->cnt = cnt; + part->type = type; + part->ro = ro; + snprintf(part->name, sizeof(part->name), name, device_get_unit(dev)); + + /* For the RPMB partition, allow IOCTL access only. */ + if (type == EXT_CSD_PART_CONFIG_ACC_RPMB) { + make_dev_args_init(&args); + args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; + args.mda_devsw = &mmcsd_rpmb_cdevsw; + args.mda_uid = UID_ROOT; + args.mda_gid = GID_OPERATOR; + args.mda_mode = 0640; + args.mda_si_drv1 = part; + if (make_dev_s(&args, &sc->rpmb_dev, "%s", part->name) != 0) { + device_printf(dev, "Failed to make RPMB device\n"); + free(part, M_DEVBUF); + return; + } + } else { + MMCSD_PART_LOCK_INIT(part); + +#ifndef __rtems__ + d = part->disk = disk_alloc(); + d->d_open = mmcsd_open; + d->d_close = mmcsd_close; + d->d_strategy = mmcsd_strategy; + d->d_ioctl = mmcsd_ioctl_disk; + d->d_dump = mmcsd_dump; + d->d_getattr = mmcsd_getattr; + d->d_name = part->name; + d->d_drv1 = part; + d->d_sectorsize = mmc_get_sector_size(dev); + d->d_maxsize = mmc_get_max_data(dev) * d->d_sectorsize; + d->d_mediasize = media_size; + d->d_stripesize = erase_size; + d->d_unit = cnt; + d->d_flags = DISKFLAG_CANDELETE; + d->d_delmaxsize = erase_size; + strlcpy(d->d_ident, mmc_get_card_sn_string(dev), + sizeof(d->d_ident)); + strlcpy(d->d_descr, mmc_get_card_id_string(dev), + sizeof(d->d_descr)); + d->d_rotation_rate = DISK_RR_NON_ROTATING; + + disk_create(d, DISK_VERSION); + bioq_init(&part->bio_queue); + + part->running = 1; + kproc_create(&mmcsd_task, part, &part->p, 0, 0, + "%s%d: mmc/sd card", part->name, cnt); #else /* __rtems__ */ - rtems_status_code status_code = rtems_media_server_disk_attach( - device_get_name(dev), - rtems_bsd_mmcsd_attach_worker, - sc - ); - BSD_ASSERT(status_code == RTEMS_SUCCESSFUL); + rtems_status_code status_code = rtems_media_server_disk_attach( + part->name, rtems_bsd_mmcsd_attach_worker, part); + BSD_ASSERT(status_code == RTEMS_SUCCESSFUL); #endif /* __rtems__ */ + } + + bytes = mmcsd_pretty_size(media_size, unit); + if (type == EXT_CSD_PART_CONFIG_ACC_DEFAULT) { + speed = mmcbr_get_clock(mmcbr); + printf("%s%d: %ju%sB <%s>%s at %s %d.%01dMHz/%dbit/%d-block\n", + part->name, cnt, bytes, unit, mmc_get_card_id_string(dev), + ro ? " (read-only)" : "", device_get_nameunit(mmcbr), + speed / 1000000, (speed / 100000) % 10, + mmcsd_bus_bit_width(dev), mmc_get_max_data(dev)); + } else if (type == EXT_CSD_PART_CONFIG_ACC_RPMB) { + printf("%s: %ju%sB partion %d%s at %s\n", part->name, bytes, + unit, type, ro ? " (read-only)" : "", + device_get_nameunit(dev)); + } else { + enh = false; + ext = NULL; + extattr = 0; + if (type >= EXT_CSD_PART_CONFIG_ACC_GP0 && + type <= EXT_CSD_PART_CONFIG_ACC_GP3) { + ext_csd = sc->ext_csd; + gp = type - EXT_CSD_PART_CONFIG_ACC_GP0; + if ((ext_csd[EXT_CSD_PART_SUPPORT] & + EXT_CSD_PART_SUPPORT_ENH_ATTR_EN) != 0 && + (ext_csd[EXT_CSD_PART_ATTR] & + (EXT_CSD_PART_ATTR_ENH_GP0 << gp)) != 0) + enh = true; + else if ((ext_csd[EXT_CSD_PART_SUPPORT] & + EXT_CSD_PART_SUPPORT_EXT_ATTR_EN) != 0) { + extattr = (ext_csd[EXT_CSD_EXT_PART_ATTR + + (gp / 2)] >> (4 * (gp % 2))) & 0xF; + switch (extattr) { + case EXT_CSD_EXT_PART_ATTR_DEFAULT: + break; + case EXT_CSD_EXT_PART_ATTR_SYSTEMCODE: + ext = "system code"; + break; + case EXT_CSD_EXT_PART_ATTR_NPERSISTENT: + ext = "non-persistent"; + break; + default: + ext = "reserved"; + break; + } + } + } + if (ext == NULL) + printf("%s%d: %ju%sB partion %d%s%s at %s\n", + part->name, cnt, bytes, unit, type, enh ? + " enhanced" : "", ro ? " (read-only)" : "", + device_get_nameunit(dev)); + else + printf("%s%d: %ju%sB partion %d extended 0x%x " + "(%s)%s at %s\n", part->name, cnt, bytes, unit, + type, extattr, ext, ro ? " (read-only)" : "", + device_get_nameunit(dev)); + } +} +#ifndef __rtems__ +static int +mmcsd_slicer(device_t dev, const char *provider, + struct flash_slice *slices, int *nslices) +{ + char name[MMCSD_PART_NAMELEN]; + struct mmcsd_softc *sc; + struct mmcsd_part *part; + + *nslices = 0; + if (slices == NULL) + return (ENOMEM); + + sc = device_get_softc(dev); + if (sc->enh_size == 0) + return (ENXIO); + + part = sc->part[EXT_CSD_PART_CONFIG_ACC_DEFAULT]; + snprintf(name, sizeof(name), "%s%d", part->disk->d_name, + part->disk->d_unit); + if (strcmp(name, provider) != 0) + return (ENXIO); + + *nslices = 1; + slices[0].base = sc->enh_base; + slices[0].size = sc->enh_size; + slices[0].label = MMCSD_LABEL_ENH; return (0); } +#endif /* __rtems__ */ static int mmcsd_detach(device_t dev) { +#ifndef __rtems__ struct mmcsd_softc *sc = device_get_softc(dev); + struct mmcsd_part *part; + int i; + + for (i = 0; i < MMC_PART_MAX; i++) { + part = sc->part[i]; + if (part != NULL && part->disk != NULL) { + MMCSD_PART_LOCK(part); + part->suspend = 0; + if (part->running > 0) { + /* kill thread */ + part->running = 0; + wakeup(part); + /* wait for thread to finish. */ + while (part->running != -1) + msleep(part, &part->part_mtx, 0, + "detach", 0); + } + MMCSD_PART_UNLOCK(part); + } + } -#ifndef __rtems__ - MMCSD_LOCK(sc); - sc->suspend = 0; - if (sc->running > 0) { - /* kill thread */ - sc->running = 0; - wakeup(sc); - /* wait for thread to finish. */ - while (sc->running != -1) - msleep(sc, &sc->sc_mtx, 0, "detach", 0); - } - MMCSD_UNLOCK(sc); - - /* Flush the request queue. */ - bioq_flush(&sc->bio_queue, NULL, ENXIO); - /* kill disk */ - disk_destroy(sc->disk); + if (sc->rpmb_dev != NULL) + destroy_dev(sc->rpmb_dev); + + for (i = 0; i < MMC_PART_MAX; i++) { + part = sc->part[i]; + if (part != NULL) { + if (part->disk != NULL) { + /* Flush the request queue. */ + bioq_flush(&part->bio_queue, NULL, ENXIO); + /* kill disk */ + disk_destroy(part->disk); + + MMCSD_PART_LOCK_DESTROY(part); + } + free(part, M_DEVBUF); + } + } #else /* __rtems__ */ BSD_PANIC("FIXME"); #endif /* __rtems__ */ - - MMCSD_LOCK_DESTROY(sc); - return (0); } @@ -493,18 +839,26 @@ mmcsd_suspend(device_t dev) { #ifndef __rtems__ struct mmcsd_softc *sc = device_get_softc(dev); - - MMCSD_LOCK(sc); - sc->suspend = 1; - if (sc->running > 0) { - /* kill thread */ - sc->running = 0; - wakeup(sc); - /* wait for thread to finish. */ - while (sc->running != -1) - msleep(sc, &sc->sc_mtx, 0, "detach", 0); - } - MMCSD_UNLOCK(sc); + struct mmcsd_part *part; + int i; + + for (i = 0; i < MMC_PART_MAX; i++) { + part = sc->part[i]; + if (part != NULL && part->disk != NULL) { + MMCSD_PART_LOCK(part); + part->suspend = 1; + if (part->running > 0) { + /* kill thread */ + part->running = 0; + wakeup(part); + /* wait for thread to finish. */ + while (part->running != -1) + msleep(part, &part->part_mtx, 0, + "detach", 0); + } + MMCSD_PART_UNLOCK(part); + } + } #else /* __rtems__ */ BSD_PANIC("FIXME"); #endif /* __rtems__ */ @@ -516,16 +870,23 @@ mmcsd_resume(device_t dev) { #ifndef __rtems__ struct mmcsd_softc *sc = device_get_softc(dev); - - MMCSD_LOCK(sc); - sc->suspend = 0; - if (sc->running <= 0) { - sc->running = 1; - MMCSD_UNLOCK(sc); - kproc_create(&mmcsd_task, sc, &sc->p, 0, 0, "%s: mmc/sd card", - device_get_nameunit(dev)); - } else - MMCSD_UNLOCK(sc); + struct mmcsd_part *part; + int i; + + for (i = 0; i < MMC_PART_MAX; i++) { + part = sc->part[i]; + if (part != NULL && part->disk != NULL) { + MMCSD_PART_LOCK(part); + part->suspend = 0; + if (part->running <= 0) { + part->running = 1; + kproc_create(&mmcsd_task, part, &part->p, 0, 0, + "%s%d: mmc/sd card", part->name, part->cnt); + MMCSD_PART_UNLOCK(part); + } else + MMCSD_PART_UNLOCK(part); + } + } #else /* __rtems__ */ BSD_PANIC("FIXME"); #endif /* __rtems__ */ @@ -534,14 +895,14 @@ mmcsd_resume(device_t dev) #ifndef __rtems__ static int -mmcsd_open(struct disk *dp) +mmcsd_open(struct disk *dp __unused) { return (0); } static int -mmcsd_close(struct disk *dp) +mmcsd_close(struct disk *dp __unused) { return (0); @@ -551,47 +912,339 @@ static void mmcsd_strategy(struct bio *bp) { struct mmcsd_softc *sc; - - sc = (struct mmcsd_softc *)bp->bio_disk->d_drv1; - MMCSD_LOCK(sc); - if (sc->running > 0 || sc->suspend > 0) { - bioq_disksort(&sc->bio_queue, bp); - MMCSD_UNLOCK(sc); - wakeup(sc); + struct mmcsd_part *part; + + part = bp->bio_disk->d_drv1; + sc = part->sc; + MMCSD_PART_LOCK(part); + if (part->running > 0 || part->suspend > 0) { + bioq_disksort(&part->bio_queue, bp); + MMCSD_PART_UNLOCK(part); + wakeup(part); } else { - MMCSD_UNLOCK(sc); + MMCSD_PART_UNLOCK(part); biofinish(bp, NULL, ENXIO); } } +#endif /* __rtems__ */ + +static int +mmcsd_ioctl_rpmb(struct cdev *dev, u_long cmd, caddr_t data, + int fflag, struct thread *td __unused) +{ + + return (mmcsd_ioctl(dev->si_drv1, cmd, data, fflag)); +} + +#ifndef __rtems__ +static int +mmcsd_ioctl_disk(struct disk *disk, u_long cmd, void *data, int fflag, + struct thread *td __unused) +{ + + return (mmcsd_ioctl(disk->d_drv1, cmd, data, fflag)); +} +#endif /* __rtems__ */ + +static int +mmcsd_ioctl(struct mmcsd_part *part, u_long cmd, void *data, int fflag) +{ + struct mmc_ioc_cmd *mic; + struct mmc_ioc_multi_cmd *mimc; + int i, err; + u_long cnt, size; + + if ((fflag & FREAD) == 0) + return (EBADF); + + err = 0; + switch (cmd) { + case MMC_IOC_CMD: + mic = data; + err = mmcsd_ioctl_cmd(part, data, fflag); + break; + case MMC_IOC_CMD_MULTI: + mimc = data; + if (mimc->num_of_cmds == 0) + break; + if (mimc->num_of_cmds > MMC_IOC_MAX_CMDS) + return (EINVAL); + cnt = mimc->num_of_cmds; + size = sizeof(*mic) * cnt; + mic = malloc(size, M_TEMP, M_WAITOK); + err = copyin((const void *)mimc->cmds, mic, size); + if (err != 0) + break; + for (i = 0; i < cnt; i++) { + err = mmcsd_ioctl_cmd(part, &mic[i], fflag); + if (err != 0) + break; + } + free(mic, M_TEMP); + break; + default: + return (ENOIOCTL); + } + return (err); +} + +static int +mmcsd_ioctl_cmd(struct mmcsd_part *part, struct mmc_ioc_cmd *mic, int fflag) +{ + struct mmc_command cmd; + struct mmc_data data; + struct mmcsd_softc *sc; + device_t dev, mmcbr; + void *dp; + u_long len; + int err, retries; + uint32_t status; + uint16_t rca; + + if ((fflag & FWRITE) == 0 && mic->write_flag != 0) + return (EBADF); + + if (part->ro == TRUE && mic->write_flag != 0) + return (EROFS); + + err = 0; + dp = NULL; + len = mic->blksz * mic->blocks; + if (len > MMC_IOC_MAX_BYTES) + return (EOVERFLOW); + if (len != 0) { + dp = malloc(len, M_TEMP, M_WAITOK); + err = copyin((void *)(uintptr_t)mic->data_ptr, dp, len); + if (err != 0) + goto out; + } + memset(&cmd, 0, sizeof(cmd)); + memset(&data, 0, sizeof(data)); + cmd.opcode = mic->opcode; + cmd.arg = mic->arg; + cmd.flags = mic->flags; + if (len != 0) { + data.len = len; + data.data = dp; + data.flags = mic->write_flag != 0 ? MMC_DATA_WRITE : + MMC_DATA_READ; + cmd.data = &data; + } + sc = part->sc; + rca = sc->rca; + if (mic->is_acmd == 0) { + /* Enforce/patch/restrict RCA-based commands */ + switch (cmd.opcode) { + case MMC_SET_RELATIVE_ADDR: + case MMC_SELECT_CARD: + err = EPERM; + goto out; + case MMC_STOP_TRANSMISSION: + if ((cmd.arg & 0x1) == 0) + break; + /* FALLTHROUGH */ + case MMC_SLEEP_AWAKE: + case MMC_SEND_CSD: + case MMC_SEND_CID: + case MMC_SEND_STATUS: + case MMC_GO_INACTIVE_STATE: + case MMC_FAST_IO: + case MMC_APP_CMD: + cmd.arg = (cmd.arg & 0x0000FFFF) | (rca << 16); + break; + default: + break; + } + } + dev = sc->dev; + mmcbr = sc->mmcbr; + MMCBUS_ACQUIRE_BUS(mmcbr, dev); + err = mmcsd_switch_part(mmcbr, dev, rca, part->type); + if (err != MMC_ERR_NONE) + goto release; + if (part->type == EXT_CSD_PART_CONFIG_ACC_RPMB) { + err = mmcsd_set_blockcount(sc, mic->blocks, + mic->write_flag & (1 << 31)); + if (err != MMC_ERR_NONE) + goto release; + } + if (mic->is_acmd != 0) + (void)mmc_wait_for_app_cmd(mmcbr, dev, rca, &cmd, 0); + else + (void)mmc_wait_for_cmd(mmcbr, dev, &cmd, 0); + if (part->type == EXT_CSD_PART_CONFIG_ACC_RPMB) { + /* + * If the request went to the RPMB partition, try to ensure + * that the command actually has completed ... + */ + retries = MMCSD_CMD_RETRIES; + do { + err = mmc_send_status(mmcbr, dev, rca, &status); + if (err != MMC_ERR_NONE) + break; + if (R1_STATUS(status) == 0 && + R1_CURRENT_STATE(status) != R1_STATE_PRG) + break; + DELAY(1000); + } while (retries-- > 0); + + /* ... and always switch back to the default partition. */ + err = mmcsd_switch_part(mmcbr, dev, rca, + EXT_CSD_PART_CONFIG_ACC_DEFAULT); + if (err != MMC_ERR_NONE) + goto release; + } + /* + * If EXT_CSD was changed, our copy is outdated now. Specifically, + * the upper bits of EXT_CSD_PART_CONFIG used in mmcsd_switch_part(), + * so retrieve EXT_CSD again. + */ + if (cmd.opcode == MMC_SWITCH_FUNC) { + err = mmc_send_ext_csd(mmcbr, dev, sc->ext_csd); + if (err != MMC_ERR_NONE) + goto release; + } + MMCBUS_RELEASE_BUS(mmcbr, dev); + if (cmd.error != MMC_ERR_NONE) { + switch (cmd.error) { + case MMC_ERR_TIMEOUT: + err = ETIMEDOUT; + break; + case MMC_ERR_BADCRC: + err = EILSEQ; + break; + case MMC_ERR_INVALID: + err = EINVAL; + break; + case MMC_ERR_NO_MEMORY: + err = ENOMEM; + break; + default: + err = EIO; + break; + } + goto out; + } + memcpy(mic->response, cmd.resp, 4 * sizeof(uint32_t)); + if (mic->write_flag == 0 && len != 0) { + err = copyout(dp, (void *)(uintptr_t)mic->data_ptr, len); + if (err != 0) + goto out; + } + goto out; + +release: + MMCBUS_RELEASE_BUS(mmcbr, dev); + err = EIO; + +out: + if (dp != NULL) + free(dp, M_TEMP); + return (err); +} + +#ifndef __rtems__ +static int +mmcsd_getattr(struct bio *bp) +{ + struct mmcsd_part *part; + device_t dev; + + if (strcmp(bp->bio_attribute, "MMC::device") == 0) { + if (bp->bio_length != sizeof(dev)) + return (EFAULT); + part = bp->bio_disk->d_drv1; + dev = part->sc->dev; + bcopy(&dev, bp->bio_data, sizeof(dev)); + bp->bio_completed = bp->bio_length; + return (0); + } + return (-1); +} +#endif /* __rtems__ */ + +static int +mmcsd_set_blockcount(struct mmcsd_softc *sc, u_int count, bool reliable) +{ + struct mmc_command cmd; + struct mmc_request req; + + memset(&req, 0, sizeof(req)); + memset(&cmd, 0, sizeof(cmd)); + cmd.mrq = &req; + req.cmd = &cmd; + cmd.opcode = MMC_SET_BLOCK_COUNT; + cmd.arg = count & 0x0000FFFF; + if (reliable) + cmd.arg |= 1 << 31; + cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; + MMCBUS_WAIT_FOR_REQUEST(sc->mmcbr, sc->dev, &req); + return (cmd.error); +} + +static int +mmcsd_switch_part(device_t bus, device_t dev, uint16_t rca, u_int part) +{ + struct mmcsd_softc *sc; + int err; + uint8_t value; + + sc = device_get_softc(dev); + + if (sc->part_curr == part) + return (MMC_ERR_NONE); + + if (sc->mode == mode_sd) + return (MMC_ERR_NONE); + + value = (sc->ext_csd[EXT_CSD_PART_CONFIG] & + ~EXT_CSD_PART_CONFIG_ACC_MASK) | part; + /* Jump! */ + err = mmc_switch(bus, dev, rca, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_PART_CONFIG, value, sc->part_time, true); + if (err != MMC_ERR_NONE) + return (err); + + sc->ext_csd[EXT_CSD_PART_CONFIG] = value; + sc->part_curr = part; + return (MMC_ERR_NONE); +} +#ifndef __rtems__ static const char * mmcsd_errmsg(int e) { + if (e < 0 || e > MMC_ERR_MAX) return "Bad error code"; return errmsg[e]; } static daddr_t -mmcsd_rw(struct mmcsd_softc *sc, struct bio *bp) +mmcsd_rw(struct mmcsd_part *part, struct bio *bp) { daddr_t block, end; struct mmc_command cmd; struct mmc_command stop; struct mmc_request req; struct mmc_data data; - device_t dev = sc->dev; - int sz = sc->disk->d_sectorsize; - device_t mmcbr = device_get_parent(dev); + struct mmcsd_softc *sc; + device_t dev, mmcbr; + int numblocks, sz; + char *vaddr; + + sc = part->sc; + dev = sc->dev; + mmcbr = sc->mmcbr; block = bp->bio_pblkno; + sz = part->disk->d_sectorsize; end = bp->bio_pblkno + (bp->bio_bcount / sz); while (block < end) { - char *vaddr = bp->bio_data + - (block - bp->bio_pblkno) * sz; - int numblocks = min(end - block, mmc_get_max_data(dev)); + vaddr = bp->bio_data + (block - bp->bio_pblkno) * sz; + numblocks = min(end - block, mmc_get_max_data(dev)); memset(&req, 0, sizeof(req)); - memset(&cmd, 0, sizeof(cmd)); + memset(&cmd, 0, sizeof(cmd)); memset(&stop, 0, sizeof(stop)); memset(&data, 0, sizeof(data)); cmd.mrq = &req; @@ -629,10 +1282,11 @@ mmcsd_rw(struct mmcsd_softc *sc, struct bio *bp) } MMCBUS_WAIT_FOR_REQUEST(mmcbr, dev, &req); if (req.cmd->error != MMC_ERR_NONE) { - if (ppsratecheck(&sc->log_time, &sc->log_count, LOG_PPS)) { + if (ppsratecheck(&sc->log_time, &sc->log_count, + LOG_PPS)) device_printf(dev, "Error indicated: %d %s\n", - req.cmd->error, mmcsd_errmsg(req.cmd->error)); - } + req.cmd->error, + mmcsd_errmsg(req.cmd->error)); break; } block += numblocks; @@ -641,33 +1295,37 @@ mmcsd_rw(struct mmcsd_softc *sc, struct bio *bp) } static daddr_t -mmcsd_delete(struct mmcsd_softc *sc, struct bio *bp) +mmcsd_delete(struct mmcsd_part *part, struct bio *bp) { daddr_t block, end, start, stop; struct mmc_command cmd; struct mmc_request req; - device_t dev = sc->dev; - int sz = sc->disk->d_sectorsize; - int erase_sector; - device_t mmcbr = device_get_parent(dev); + struct mmcsd_softc *sc; + device_t dev, mmcbr; + int erase_sector, sz; + + sc = part->sc; + dev = sc->dev; + mmcbr = sc->mmcbr; block = bp->bio_pblkno; + sz = part->disk->d_sectorsize; end = bp->bio_pblkno + (bp->bio_bcount / sz); /* Coalesce with part remaining from previous request. */ - if (block > sc->eblock && block <= sc->eend) - block = sc->eblock; - if (end >= sc->eblock && end < sc->eend) - end = sc->eend; + if (block > part->eblock && block <= part->eend) + block = part->eblock; + if (end >= part->eblock && end < part->eend) + end = part->eend; /* Safe round to the erase sector boundaries. */ erase_sector = mmc_get_erase_sector(dev); start = block + erase_sector - 1; /* Round up. */ start -= start % erase_sector; stop = end; /* Round down. */ - stop -= end % erase_sector; - /* We can't erase area smaller then sector, store it for later. */ + stop -= end % erase_sector; + /* We can't erase an area smaller than a sector, store it for later. */ if (start >= stop) { - sc->eblock = block; - sc->eend = end; + part->eblock = block; + part->eend = end; return (end); } @@ -720,40 +1378,54 @@ mmcsd_delete(struct mmcsd_softc *sc, struct bio *bp) return (block); } /* Store one of remaining parts for the next call. */ - if (bp->bio_pblkno >= sc->eblock || block == start) { - sc->eblock = stop; /* Predict next forward. */ - sc->eend = end; + if (bp->bio_pblkno >= part->eblock || block == start) { + part->eblock = stop; /* Predict next forward. */ + part->eend = end; } else { - sc->eblock = block; /* Predict next backward. */ - sc->eend = start; + part->eblock = block; /* Predict next backward. */ + part->eend = start; } return (end); } static int -mmcsd_dump(void *arg, void *virtual, vm_offset_t physical, - off_t offset, size_t length) +mmcsd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, + size_t length) { - struct disk *disk = arg; - struct mmcsd_softc *sc = (struct mmcsd_softc *)disk->d_drv1; - device_t dev = sc->dev; struct bio bp; daddr_t block, end; - device_t mmcbr = device_get_parent(dev); + struct disk *disk; + struct mmcsd_softc *sc; + struct mmcsd_part *part; + device_t dev, mmcbr; + int err; /* length zero is special and really means flush buffers to media */ if (!length) return (0); + disk = arg; + part = disk->d_drv1; + sc = part->sc; + dev = sc->dev; + mmcbr = sc->mmcbr; + g_reset_bio(&bp); bp.bio_disk = disk; bp.bio_pblkno = offset / disk->d_sectorsize; bp.bio_bcount = length; bp.bio_data = virtual; bp.bio_cmd = BIO_WRITE; - end = bp.bio_pblkno + bp.bio_bcount / sc->disk->d_sectorsize; + end = bp.bio_pblkno + bp.bio_bcount / disk->d_sectorsize; MMCBUS_ACQUIRE_BUS(mmcbr, dev); - block = mmcsd_rw(sc, &bp); + err = mmcsd_switch_part(mmcbr, dev, sc->rca, part->type); + if (err != MMC_ERR_NONE) { + if (ppsratecheck(&sc->log_time, &sc->log_count, LOG_PPS)) + device_printf(dev, "Partition switch error\n"); + MMCBUS_RELEASE_BUS(mmcbr, dev); + return (EIO); + } + block = mmcsd_rw(part, &bp); MMCBUS_RELEASE_BUS(mmcbr, dev); return ((end < block) ? EIO : 0); } @@ -761,24 +1433,30 @@ mmcsd_dump(void *arg, void *virtual, vm_offset_t physical, static void mmcsd_task(void *arg) { - struct mmcsd_softc *sc = (struct mmcsd_softc*)arg; - struct bio *bp; - int sz; daddr_t block, end; - device_t dev = sc->dev; - device_t mmcbr = device_get_parent(sc->dev); + struct mmcsd_part *part; + struct mmcsd_softc *sc; + struct bio *bp; + device_t dev, mmcbr; + int err, sz; + + part = arg; + sc = part->sc; + dev = sc->dev; + mmcbr = sc->mmcbr; while (1) { - MMCSD_LOCK(sc); + MMCSD_PART_LOCK(part); do { - if (sc->running == 0) + if (part->running == 0) goto out; - bp = bioq_takefirst(&sc->bio_queue); + bp = bioq_takefirst(&part->bio_queue); if (bp == NULL) - msleep(sc, &sc->sc_mtx, PRIBIO, "jobqueue", 0); + msleep(part, &part->part_mtx, PRIBIO, + "jobqueue", 0); } while (bp == NULL); - MMCSD_UNLOCK(sc); - if (bp->bio_cmd != BIO_READ && mmc_get_read_only(dev)) { + MMCSD_PART_UNLOCK(part); + if (bp->bio_cmd != BIO_READ && part->ro) { bp->bio_error = EROFS; bp->bio_resid = bp->bio_bcount; bp->bio_flags |= BIO_ERROR; @@ -786,30 +1464,40 @@ mmcsd_task(void *arg) continue; } MMCBUS_ACQUIRE_BUS(mmcbr, dev); - sz = sc->disk->d_sectorsize; + sz = part->disk->d_sectorsize; block = bp->bio_pblkno; end = bp->bio_pblkno + (bp->bio_bcount / sz); + err = mmcsd_switch_part(mmcbr, dev, sc->rca, part->type); + if (err != MMC_ERR_NONE) { + if (ppsratecheck(&sc->log_time, &sc->log_count, + LOG_PPS)) + device_printf(dev, "Partition switch error\n"); + goto release; + } if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { /* Access to the remaining erase block obsoletes it. */ - if (block < sc->eend && end > sc->eblock) - sc->eblock = sc->eend = 0; - block = mmcsd_rw(sc, bp); + if (block < part->eend && end > part->eblock) + part->eblock = part->eend = 0; + block = mmcsd_rw(part, bp); } else if (bp->bio_cmd == BIO_DELETE) { - block = mmcsd_delete(sc, bp); + block = mmcsd_delete(part, bp); } +release: MMCBUS_RELEASE_BUS(mmcbr, dev); if (block < end) { bp->bio_error = EIO; bp->bio_resid = (end - block) * sz; bp->bio_flags |= BIO_ERROR; + } else { + bp->bio_resid = 0; } biodone(bp); } out: /* tell parent we're done */ - sc->running = -1; - MMCSD_UNLOCK(sc); - wakeup(sc); + part->running = -1; + MMCSD_PART_UNLOCK(part); + wakeup(part); kproc_exit(0); } @@ -842,4 +1530,24 @@ static driver_t mmcsd_driver = { }; static devclass_t mmcsd_devclass; -DRIVER_MODULE(mmcsd, mmc, mmcsd_driver, mmcsd_devclass, NULL, NULL); +static int +mmcsd_handler(module_t mod __unused, int what, void *arg __unused) +{ + +#ifndef __rtems__ + switch (what) { + case MOD_LOAD: + flash_register_slicer(mmcsd_slicer, FLASH_SLICES_TYPE_MMC, + TRUE); + return (0); + case MOD_UNLOAD: + flash_register_slicer(NULL, FLASH_SLICES_TYPE_MMC, TRUE); + return (0); + } +#endif /* __rtems__ */ + return (0); +} + +DRIVER_MODULE(mmcsd, mmc, mmcsd_driver, mmcsd_devclass, mmcsd_handler, NULL); +MODULE_DEPEND(mmcsd, g_flashmap, 0, 0, 0); +MMC_DEPEND(mmcsd); diff --git a/freebsd/sys/dev/mmc/mmcvar.h b/freebsd/sys/dev/mmc/mmcvar.h index c7a4af99..9f62b112 100644 --- a/freebsd/sys/dev/mmc/mmcvar.h +++ b/freebsd/sys/dev/mmc/mmcvar.h @@ -49,15 +49,14 @@ * or the SD Card Association to disclose or distribute any technical * information, know-how or other confidential information to any third party. * - * "$FreeBSD$" + * $FreeBSD$ */ #ifndef DEV_MMC_MMCVAR_H #define DEV_MMC_MMCVAR_H -#include - enum mmc_device_ivars { + MMC_IVAR_SPEC_VERS, MMC_IVAR_DSR_IMP, MMC_IVAR_MEDIA_SIZE, MMC_IVAR_RCA, @@ -79,6 +78,7 @@ enum mmc_device_ivars { #define MMC_ACCESSOR(var, ivar, type) \ __BUS_ACCESSOR(mmc, var, MMC, ivar, type) +MMC_ACCESSOR(spec_vers, SPEC_VERS, uint8_t) MMC_ACCESSOR(dsr_imp, DSR_IMP, int) MMC_ACCESSOR(media_size, MEDIA_SIZE, long) MMC_ACCESSOR(rca, RCA, int) diff --git a/freebsd/sys/dev/nvme/nvme.h b/freebsd/sys/dev/nvme/nvme.h index 9c1dab17..1bad4392 100644 --- a/freebsd/sys/dev/nvme/nvme.h +++ b/freebsd/sys/dev/nvme/nvme.h @@ -955,7 +955,8 @@ void nvme_ns_rw_cmd(struct nvme_command *cmd, uint32_t rwcmd, uint16_t nsid, { cmd->opc = rwcmd; cmd->nsid = nsid; - *(uint64_t *)&cmd->cdw10 = lba; + cmd->cdw10 = lba & 0xffffffffu; + cmd->cdw11 = lba >> 32; cmd->cdw12 = count-1; cmd->cdw13 = 0; cmd->cdw14 = 0; diff --git a/freebsd/sys/dev/ofw/ofw_bus_subr.c b/freebsd/sys/dev/ofw/ofw_bus_subr.c index 79852afc..a067b949 100644 --- a/freebsd/sys/dev/ofw/ofw_bus_subr.c +++ b/freebsd/sys/dev/ofw/ofw_bus_subr.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include #define OFW_COMPAT_LEN 255 +#define OFW_STATUS_LEN 16 int ofw_bus_gen_setup_devinfo(struct ofw_bus_devinfo *obd, phandle_t node) @@ -181,6 +182,24 @@ ofw_bus_status_okay(device_t dev) return (0); } +int +ofw_bus_node_status_okay(phandle_t node) +{ + char status[OFW_STATUS_LEN]; + int len; + + len = OF_getproplen(node, "status"); + if (len <= 0) + return (1); + + OF_getprop(node, "status", status, OFW_STATUS_LEN); + if ((len == 5 && (bcmp(status, "okay", len) == 0)) || + (len == 3 && (bcmp(status, "ok", len)))) + return (1); + + return (0); +} + static int ofw_bus_node_is_compatible_int(const char *compat, int len, const char *onecompat) @@ -946,7 +965,7 @@ ofw_bus_string_list_to_array(phandle_t node, const char *list_name, i += len; tptr += len; } - array[cnt] = 0; + array[cnt] = NULL; *out_array = array; return (cnt); diff --git a/freebsd/sys/dev/ofw/ofw_bus_subr.h b/freebsd/sys/dev/ofw/ofw_bus_subr.h index 30f299a6..4afd84e3 100644 --- a/freebsd/sys/dev/ofw/ofw_bus_subr.h +++ b/freebsd/sys/dev/ofw/ofw_bus_subr.h @@ -100,6 +100,7 @@ int ofw_bus_intr_by_rid(device_t, phandle_t, int, phandle_t *, int *, /* Helper to get device status property */ const char *ofw_bus_get_status(device_t dev); int ofw_bus_status_okay(device_t dev); +int ofw_bus_node_status_okay(phandle_t node); /* Helper to get node's interrupt parent */ phandle_t ofw_bus_find_iparent(phandle_t); diff --git a/freebsd/sys/dev/ofw/ofw_fdt.c b/freebsd/sys/dev/ofw/ofw_fdt.c index ae3da8e4..20e07e90 100644 --- a/freebsd/sys/dev/ofw/ofw_fdt.c +++ b/freebsd/sys/dev/ofw/ofw_fdt.c @@ -432,6 +432,7 @@ ofw_fdt_package_to_path(ofw_t ofw, phandle_t package, char *buf, size_t len) return (-1); } +#ifndef __rtems__ #if defined(FDT_MARVELL) || defined(__powerpc__) static int ofw_fdt_fixup(ofw_t ofw) @@ -467,11 +468,13 @@ ofw_fdt_fixup(ofw_t ofw) return (0); } #endif +#endif /* __rtems__ */ static int ofw_fdt_interpret(ofw_t ofw, const char *cmd, int nret, cell_t *retvals) { #if defined(FDT_MARVELL) || defined(__powerpc__) +#ifndef __rtems__ int rv; /* @@ -490,6 +493,9 @@ ofw_fdt_interpret(ofw_t ofw, const char *cmd, int nret, cell_t *retvals) retvals[0] = rv; return (rv); +#else /* __rtems__ */ + return (0); +#endif /* __rtems__ */ #else return (0); #endif diff --git a/freebsd/sys/dev/pci/pci.c b/freebsd/sys/dev/pci/pci.c index 2eba4ca2..3209f893 100644 --- a/freebsd/sys/dev/pci/pci.c +++ b/freebsd/sys/dev/pci/pci.c @@ -283,13 +283,14 @@ static const struct pci_quirk pci_quirks[] = { { 0x43851002, PCI_QUIRK_UNMAP_REG, 0x14, 0 }, /* - * Atheros AR8161/AR8162/E2200/E2400 Ethernet controllers have a - * bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit + * Atheros AR8161/AR8162/E2200/E2400/E2500 Ethernet controllers have + * a bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit * of the command register is set. */ { 0x10911969, PCI_QUIRK_MSI_INTX_BUG, 0, 0 }, { 0xE0911969, PCI_QUIRK_MSI_INTX_BUG, 0, 0 }, { 0xE0A11969, PCI_QUIRK_MSI_INTX_BUG, 0, 0 }, + { 0xE0B11969, PCI_QUIRK_MSI_INTX_BUG, 0, 0 }, { 0x10901969, PCI_QUIRK_MSI_INTX_BUG, 0, 0 }, /* @@ -3099,7 +3100,7 @@ pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl, * If base is 0, then we have problems if this architecture does * not allow that. It is best to ignore such entries for the * moment. These will be allocated later if the driver specifically - * requests them. However, some removable busses look better when + * requests them. However, some removable buses look better when * all resources are allocated, so allow '0' to be overriden. * * Similarly treat maps whose values is the same as the test value @@ -4188,7 +4189,7 @@ pci_attach(device_t dev) /* * Since there can be multiple independently numbered PCI - * busses on systems with multiple PCI domains, we can't use + * buses on systems with multiple PCI domains, we can't use * the unit number to decide which bus we are probing. We ask * the parent pcib what our domain and bus numbers are. */ diff --git a/freebsd/sys/dev/pci/pci_pci.c b/freebsd/sys/dev/pci/pci_pci.c index 7d763dd9..d468ca2e 100644 --- a/freebsd/sys/dev/pci/pci_pci.c +++ b/freebsd/sys/dev/pci/pci_pci.c @@ -78,6 +78,8 @@ static void pcib_pcie_ab_timeout(void *arg); static void pcib_pcie_cc_timeout(void *arg); static void pcib_pcie_dll_timeout(void *arg); #endif +static int pcib_request_feature(device_t pcib, device_t dev, + enum pci_feature feature); static device_method_t pcib_methods[] = { /* Device interface */ @@ -121,6 +123,7 @@ static device_method_t pcib_methods[] = { DEVMETHOD(pcib_try_enable_ari, pcib_try_enable_ari), DEVMETHOD(pcib_ari_enabled, pcib_ari_enabled), DEVMETHOD(pcib_decode_rid, pcib_ari_decode_rid), + DEVMETHOD(pcib_request_feature, pcib_request_feature), DEVMETHOD_END }; @@ -920,6 +923,7 @@ static void pcib_probe_hotplug(struct pcib_softc *sc) { device_t dev; + uint32_t link_cap; uint16_t link_sta, slot_sta; if (!pci_enable_pcie_hp) @@ -932,11 +936,13 @@ pcib_probe_hotplug(struct pcib_softc *sc) if (!(pcie_read_config(dev, PCIER_FLAGS, 2) & PCIEM_FLAGS_SLOT)) return; - sc->pcie_link_cap = pcie_read_config(dev, PCIER_LINK_CAP, 4); sc->pcie_slot_cap = pcie_read_config(dev, PCIER_SLOT_CAP, 4); if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_HPC) == 0) return; + link_cap = pcie_read_config(dev, PCIER_LINK_CAP, 4); + if ((link_cap & PCIEM_LINK_CAP_DL_ACTIVE) == 0) + return; /* * Some devices report that they have an MRL when they actually @@ -947,8 +953,7 @@ pcib_probe_hotplug(struct pcib_softc *sc) * If there is an open MRL but the Data Link Layer is active, * the MRL is not real. */ - if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP) != 0 && - (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) != 0) { + if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP) != 0) { link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2); slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2); if ((slot_sta & PCIEM_SLOT_STA_MRLSS) != 0 && @@ -957,6 +962,17 @@ pcib_probe_hotplug(struct pcib_softc *sc) } } + /* + * Now that we're sure we want to do hot plug, ask the + * firmware, if any, if that's OK. + */ + if (pcib_request_feature(device_get_parent(device_get_parent(dev)), dev, + PCI_FEATURE_HP) != 0) { + if (bootverbose) + device_printf(dev, "Unable to activate hot plug feature.\n"); + return; + } + sc->flags |= PCIB_HOTPLUG; } @@ -1061,10 +1077,8 @@ pcib_hotplug_present(struct pcib_softc *sc) return (0); /* Require the Data Link Layer to be active. */ - if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) { - if (!(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE)) - return (0); - } + if (!(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE)) + return (0); return (-1); } @@ -1121,20 +1135,18 @@ pcib_pcie_hotplug_update(struct pcib_softc *sc, uint16_t val, uint16_t mask, * changed on this interrupt. Stop any scheduled timer if * the Data Link Layer is active. */ - if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) { - if (card_inserted && - !(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) && - sc->pcie_slot_sta & - (PCIEM_SLOT_STA_MRLSC | PCIEM_SLOT_STA_PDC)) { - if (cold) - device_printf(sc->dev, - "Data Link Layer inactive\n"); - else - callout_reset(&sc->pcie_dll_timer, hz, - pcib_pcie_dll_timeout, sc); - } else if (sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) - callout_stop(&sc->pcie_dll_timer); - } + if (card_inserted && + !(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) && + sc->pcie_slot_sta & + (PCIEM_SLOT_STA_MRLSC | PCIEM_SLOT_STA_PDC)) { + if (cold) + device_printf(sc->dev, + "Data Link Layer inactive\n"); + else + callout_reset(&sc->pcie_dll_timer, hz, + pcib_pcie_dll_timeout, sc); + } else if (sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) + callout_stop(&sc->pcie_dll_timer); pcib_pcie_hotplug_command(sc, val, mask); @@ -1149,7 +1161,7 @@ pcib_pcie_hotplug_update(struct pcib_softc *sc, uint16_t val, uint16_t mask, } static void -pcib_pcie_intr(void *arg) +pcib_pcie_intr_hotplug(void *arg) { struct pcib_softc *sc; device_t dev; @@ -1262,7 +1274,7 @@ pcib_pcie_cc_timeout(void *arg) } else { device_printf(dev, "Missed HotPlug interrupt waiting for Command Completion\n"); - pcib_pcie_intr(sc); + pcib_pcie_intr_hotplug(sc); } } @@ -1285,7 +1297,7 @@ pcib_pcie_dll_timeout(void *arg) } else if (sta != sc->pcie_link_sta) { device_printf(dev, "Missed HotPlug interrupt waiting for DLL Active\n"); - pcib_pcie_intr(sc); + pcib_pcie_intr_hotplug(sc); } } @@ -1331,7 +1343,7 @@ pcib_alloc_pcie_irq(struct pcib_softc *sc) } error = bus_setup_intr(dev, sc->pcie_irq, INTR_TYPE_MISC, - NULL, pcib_pcie_intr, sc, &sc->pcie_ihand); + NULL, pcib_pcie_intr_hotplug, sc, &sc->pcie_ihand); if (error) { device_printf(dev, "Failed to setup PCI-e interrupt handler\n"); bus_release_resource(dev, SYS_RES_IRQ, rid, sc->pcie_irq); @@ -1384,7 +1396,7 @@ pcib_setup_hotplug(struct pcib_softc *sc) mask = PCIEM_SLOT_CTL_DLLSCE | PCIEM_SLOT_CTL_HPIE | PCIEM_SLOT_CTL_CCIE | PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_MRLSCE | PCIEM_SLOT_CTL_PFDE | PCIEM_SLOT_CTL_ABPE; - val = PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_HPIE; + val = PCIEM_SLOT_CTL_DLLSCE | PCIEM_SLOT_CTL_HPIE | PCIEM_SLOT_CTL_PDCE; if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_APB) val |= PCIEM_SLOT_CTL_ABPE; if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PCP) @@ -1393,8 +1405,6 @@ pcib_setup_hotplug(struct pcib_softc *sc) val |= PCIEM_SLOT_CTL_MRLSCE; if (!(sc->pcie_slot_cap & PCIEM_SLOT_CAP_NCCS)) val |= PCIEM_SLOT_CTL_CCIE; - if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) - val |= PCIEM_SLOT_CTL_DLLSCE; /* Turn the attention indicator off. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_AIP) { @@ -2835,3 +2845,43 @@ pcib_try_enable_ari(device_t pcib, device_t dev) return (0); } + +int +pcib_request_feature_allow(device_t pcib, device_t dev, + enum pci_feature feature) +{ + /* + * No host firmware we have to negotiate with, so we allow + * every valid feature requested. + */ + switch (feature) { + case PCI_FEATURE_AER: + case PCI_FEATURE_HP: + break; + default: + return (EINVAL); + } + + return (0); +} + +/* + * Pass the request to use this PCI feature up the tree. Either there's a + * firmware like ACPI that's using this feature that will approve (or deny) the + * request to take it over, or the platform has no such firmware, in which case + * the request will be approved. If the request is approved, the OS is expected + * to make use of the feature or render it harmless. + */ +static int +pcib_request_feature(device_t pcib, device_t dev, enum pci_feature feature) +{ + device_t bus; + + /* + * Our parent is necessarily a pci bus. Its parent will either be + * another pci bridge (which passes it up) or a host bridge that can + * approve or reject the request. + */ + bus = device_get_parent(pcib); + return (PCIB_REQUEST_FEATURE(device_get_parent(bus), dev, feature)); +} diff --git a/freebsd/sys/dev/pci/pci_private.h b/freebsd/sys/dev/pci/pci_private.h index b0f14818..6c5a1677 100644 --- a/freebsd/sys/dev/pci/pci_private.h +++ b/freebsd/sys/dev/pci/pci_private.h @@ -34,7 +34,7 @@ /* * Export definitions of the pci bus so that we can more easily share - * it with "subclass" busses. + * it with "subclass" buses. */ DECLARE_CLASS(pci_driver); diff --git a/freebsd/sys/dev/pci/pcib_private.h b/freebsd/sys/dev/pci/pcib_private.h index 65aec8d4..1004e133 100644 --- a/freebsd/sys/dev/pci/pcib_private.h +++ b/freebsd/sys/dev/pci/pcib_private.h @@ -132,7 +132,6 @@ struct pcib_softc uint16_t bridgectl; /* bridge control register */ uint16_t pcie_link_sta; uint16_t pcie_slot_sta; - uint32_t pcie_link_cap; uint32_t pcie_slot_cap; struct resource *pcie_irq; void *pcie_ihand; @@ -194,5 +193,6 @@ int pcib_get_id(device_t pcib, device_t dev, enum pci_id_type type, uintptr_t *id); void pcib_decode_rid(device_t pcib, uint16_t rid, int *bus, int *slot, int *func); +int pcib_request_feature_allow(device_t pcib, device_t dev, enum pci_feature feature); #endif diff --git a/freebsd/sys/dev/pci/pcireg.h b/freebsd/sys/dev/pci/pcireg.h index 291bb2ea..b434b2e6 100644 --- a/freebsd/sys/dev/pci/pcireg.h +++ b/freebsd/sys/dev/pci/pcireg.h @@ -478,6 +478,11 @@ #define PCIS_DASP_MGMT_CARD 0x20 #define PCIS_DASP_OTHER 0x80 +#define PCIC_ACCEL 0x12 +#define PCIS_ACCEL_PROCESSING 0x00 + +#define PCIC_INSTRUMENT 0x13 + #define PCIC_OTHER 0xff /* Bridge Control Values. */ @@ -1040,3 +1045,19 @@ #define PCIR_SRIOV_BARS 0x24 #define PCIR_SRIOV_BAR(x) (PCIR_SRIOV_BARS + (x) * 4) +/* + * PCI Express Firmware Interface definitions + */ +#define PCI_OSC_STATUS 0 +#define PCI_OSC_SUPPORT 1 +#define PCIM_OSC_SUPPORT_EXT_PCI_CONF 0x01 /* Extended PCI Config Space */ +#define PCIM_OSC_SUPPORT_ASPM 0x02 /* Active State Power Management */ +#define PCIM_OSC_SUPPORT_CPMC 0x04 /* Clock Power Management Cap */ +#define PCIM_OSC_SUPPORT_SEG_GROUP 0x08 /* PCI Segment Groups supported */ +#define PCIM_OSC_SUPPORT_MSI 0x10 /* MSI signalling supported */ +#define PCI_OSC_CTL 2 +#define PCIM_OSC_CTL_PCIE_HP 0x01 /* PCIe Native Hot Plug */ +#define PCIM_OSC_CTL_SHPC_HP 0x02 /* SHPC Native Hot Plug */ +#define PCIM_OSC_CTL_PCIE_PME 0x04 /* PCIe Native Power Mgt Events */ +#define PCIM_OSC_CTL_PCIE_AER 0x08 /* PCIe Advanced Error Reporting */ +#define PCIM_OSC_CTL_PCIE_CAP_STRUCT 0x10 /* Various Capability Structures */ diff --git a/freebsd/sys/dev/rtwn/if_rtwn.c b/freebsd/sys/dev/rtwn/if_rtwn.c index a553814f..050d9960 100644 --- a/freebsd/sys/dev/rtwn/if_rtwn.c +++ b/freebsd/sys/dev/rtwn/if_rtwn.c @@ -123,9 +123,6 @@ static int rtwn_run(struct rtwn_softc *, static void rtwn_watchdog(void *); #endif static void rtwn_parent(struct ieee80211com *); -static int rtwn_llt_write(struct rtwn_softc *, uint32_t, - uint32_t); -static int rtwn_llt_init(struct rtwn_softc *); static int rtwn_dma_init(struct rtwn_softc *); static int rtwn_mac_init(struct rtwn_softc *); static void rtwn_mrr_init(struct rtwn_softc *); @@ -697,6 +694,7 @@ rtwn_ioctl_reset(struct ieee80211vap *vap, u_long cmd) case IEEE80211_IOC_RTSTHRESHOLD: case IEEE80211_IOC_PROTMODE: case IEEE80211_IOC_HTPROTMODE: + case IEEE80211_IOC_LDPC: error = 0; break; default: @@ -1384,54 +1382,6 @@ rtwn_parent(struct ieee80211com *ic) rtwn_stop(sc); } - -static int -rtwn_llt_write(struct rtwn_softc *sc, uint32_t addr, uint32_t data) -{ - int ntries, error; - - error = rtwn_write_4(sc, R92C_LLT_INIT, - SM(R92C_LLT_INIT_OP, R92C_LLT_INIT_OP_WRITE) | - SM(R92C_LLT_INIT_ADDR, addr) | - SM(R92C_LLT_INIT_DATA, data)); - if (error != 0) - return (error); - /* Wait for write operation to complete. */ - for (ntries = 0; ntries < 20; ntries++) { - if (MS(rtwn_read_4(sc, R92C_LLT_INIT), R92C_LLT_INIT_OP) == - R92C_LLT_INIT_OP_NO_ACTIVE) - return (0); - rtwn_delay(sc, 10); - } - return (ETIMEDOUT); -} - -static int -rtwn_llt_init(struct rtwn_softc *sc) -{ - int i, error; - - /* Reserve pages [0; page_count]. */ - for (i = 0; i < sc->page_count; i++) { - if ((error = rtwn_llt_write(sc, i, i + 1)) != 0) - return (error); - } - /* NB: 0xff indicates end-of-list. */ - if ((error = rtwn_llt_write(sc, i, 0xff)) != 0) - return (error); - /* - * Use pages [page_count + 1; pktbuf_count - 1] - * as ring buffer. - */ - for (++i; i < sc->pktbuf_count - 1; i++) { - if ((error = rtwn_llt_write(sc, i, i + 1)) != 0) - return (error); - } - /* Make the last page point to the beginning of the ring buffer. */ - error = rtwn_llt_write(sc, i, sc->page_count + 1); - return (error); -} - static int rtwn_dma_init(struct rtwn_softc *sc) { @@ -1770,13 +1720,13 @@ rtwn_node_alloc(struct ieee80211vap *vap, } static void -rtwn_newassoc(struct ieee80211_node *ni, int isnew) +rtwn_newassoc(struct ieee80211_node *ni, int isnew __unused) { struct rtwn_softc *sc = ni->ni_ic->ic_softc; struct rtwn_node *un = RTWN_NODE(ni); int id; - if (!isnew) + if (un->id != RTWN_MACID_UNDEFINED) return; RTWN_NT_LOCK(sc); @@ -2001,6 +1951,7 @@ rtwn_stop(struct rtwn_softc *sc) sc->fwver = 0; sc->thcal_temp = 0; sc->cur_bcnq_id = RTWN_VAP_ID_INVALID; + bzero(&sc->last_physt, sizeof(sc->last_physt)); #ifdef D4054 ieee80211_tx_watchdog_stop(&sc->sc_ic); diff --git a/freebsd/sys/dev/rtwn/if_rtwn_rx.c b/freebsd/sys/dev/rtwn/if_rtwn_rx.c index 8d103dc7..31ab7e69 100644 --- a/freebsd/sys/dev/rtwn/if_rtwn_rx.c +++ b/freebsd/sys/dev/rtwn/if_rtwn_rx.c @@ -119,18 +119,19 @@ rtwn_set_basicrates(struct rtwn_softc *sc, uint32_t rates) } static void -rtwn_update_avgrssi(struct rtwn_softc *sc, struct rtwn_node *un, int rate) +rtwn_update_avgrssi(struct rtwn_softc *sc, struct rtwn_node *un, int8_t rssi, + int is_cck) { int pwdb; /* Convert antenna signal to percentage. */ - if (un->last_rssi <= -100 || un->last_rssi >= 20) + if (rssi <= -100 || rssi >= 20) pwdb = 0; - else if (un->last_rssi >= 0) + else if (rssi >= 0) pwdb = 100; else - pwdb = 100 + un->last_rssi; - if (RTWN_RATE_IS_CCK(rate)) { + pwdb = 100 + rssi; + if (is_cck) { /* CCK gain is smaller than OFDM/MCS gain. */ pwdb += 6; if (pwdb > 100) @@ -157,11 +158,11 @@ rtwn_update_avgrssi(struct rtwn_softc *sc, struct rtwn_node *un, int rate) } static int8_t -rtwn_get_rssi(struct rtwn_softc *sc, int rate, void *physt) +rtwn_get_rssi(struct rtwn_softc *sc, void *physt, int is_cck) { int8_t rssi; - if (RTWN_RATE_IS_CCK(rate)) + if (is_cck) rssi = rtwn_get_rssi_cck(sc, physt); else /* OFDM/HT. */ rssi = rtwn_get_rssi_ofdm(sc, physt); @@ -190,81 +191,133 @@ rtwn_get_tsf(struct rtwn_softc *sc, uint64_t *buf, int id) *buf += rtwn_get_tsf_low(sc, id); } +static uint64_t +rtwn_extend_rx_tsf(struct rtwn_softc *sc, const struct r92c_rx_stat *stat) +{ + uint64_t tsft; + uint32_t rxdw3, tsfl, tsfl_curr; + int id; + + rxdw3 = le32toh(stat->rxdw3); + tsfl = le32toh(stat->tsf_low); + id = MS(rxdw3, R92C_RXDW3_BSSID_FIT); + + switch (id) { + case 1: + case 2: + id >>= 1; + tsfl_curr = rtwn_get_tsf_low(sc, id); + break; + default: + { + uint32_t tsfl0, tsfl1; + + tsfl0 = rtwn_get_tsf_low(sc, 0); + tsfl1 = rtwn_get_tsf_low(sc, 1); + + if (abs(tsfl0 - tsfl) < abs(tsfl1 - tsfl)) { + id = 0; + tsfl_curr = tsfl0; + } else { + id = 1; + tsfl_curr = tsfl1; + } + break; + } + } + + tsft = rtwn_get_tsf_high(sc, id); + if (tsfl > tsfl_curr && tsfl > 0xffff0000) + tsft--; + tsft <<= 32; + tsft += tsfl; + + return (tsft); +} + struct ieee80211_node * -rtwn_rx_common(struct rtwn_softc *sc, struct mbuf *m, void *desc, - int8_t *rssi) +rtwn_rx_common(struct rtwn_softc *sc, struct mbuf *m, void *desc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_node *ni; struct ieee80211_frame_min *wh; + struct ieee80211_rx_stats rxs; struct rtwn_node *un; struct r92c_rx_stat *stat; - uint32_t rxdw0, rxdw3; - int cipher, infosz, pktlen, rate, shift; + void *physt; + uint32_t rxdw0; + int8_t rssi; + int cipher, infosz, is_cck, pktlen, shift; stat = desc; rxdw0 = le32toh(stat->rxdw0); - rxdw3 = le32toh(stat->rxdw3); cipher = MS(rxdw0, R92C_RXDW0_CIPHER); infosz = MS(rxdw0, R92C_RXDW0_INFOSZ) * 8; pktlen = MS(rxdw0, R92C_RXDW0_PKTLEN); shift = MS(rxdw0, R92C_RXDW0_SHIFT); - rate = MS(rxdw3, R92C_RXDW3_RATE); wh = (struct ieee80211_frame_min *)(mtodo(m, shift + infosz)); if ((wh->i_fc[1] & IEEE80211_FC1_PROTECTED) && cipher != R92C_CAM_ALGO_NONE) m->m_flags |= M_WEP; - if (pktlen >= sizeof(*wh)) + if (pktlen >= sizeof(*wh)) { ni = ieee80211_find_rxnode(ic, wh); - else + if (ni != NULL && (ni->ni_flags & IEEE80211_NODE_HT)) + m->m_flags |= M_AMPDU; + } else ni = NULL; un = RTWN_NODE(ni); - /* Get RSSI from PHY status descriptor if present. */ - if (infosz != 0 && (rxdw0 & R92C_RXDW0_PHYST)) { - *rssi = rtwn_get_rssi(sc, rate, mtod(m, void *)); - RTWN_DPRINTF(sc, RTWN_DEBUG_RSSI, "%s: rssi %d, ridx %d\n", - __func__, *rssi, rate); + if (infosz != 0 && (rxdw0 & R92C_RXDW0_PHYST)) + physt = (void *)mtodo(m, shift); + else + physt = (un != NULL) ? &un->last_physt : &sc->last_physt; + + bzero(&rxs, sizeof(rxs)); + rtwn_get_rx_stats(sc, &rxs, desc, physt); + if (rxs.c_pktflags & IEEE80211_RX_F_AMPDU) { + /* Next MPDU will come without PHY info. */ + memcpy(&sc->last_physt, physt, sizeof(sc->last_physt)); + if (un != NULL) + memcpy(&un->last_physt, physt, sizeof(sc->last_physt)); + } - sc->last_rssi = *rssi; - if (un != NULL) { - un->last_rssi = *rssi; + /* Add some common bits. */ + /* NB: should not happen. */ + if (rxdw0 & R92C_RXDW0_CRCERR) + rxs.c_pktflags |= IEEE80211_RX_F_FAIL_FCSCRC; + + rxs.r_flags |= IEEE80211_R_TSF_START; /* XXX undocumented */ + rxs.r_flags |= IEEE80211_R_TSF64; + rxs.c_rx_tsf = rtwn_extend_rx_tsf(sc, stat); + + /* Get RSSI from PHY status descriptor. */ + is_cck = (rxs.c_pktflags & IEEE80211_RX_F_CCK) != 0; + rssi = rtwn_get_rssi(sc, physt, is_cck); + + /* XXX TODO: we really need a rate-to-string method */ + RTWN_DPRINTF(sc, RTWN_DEBUG_RSSI, "%s: rssi %d, rate %d\n", + __func__, rssi, rxs.c_rate); + if (un != NULL && infosz != 0 && (rxdw0 & R92C_RXDW0_PHYST)) { + /* Update our average RSSI. */ + rtwn_update_avgrssi(sc, un, rssi, is_cck); + } - /* Update our average RSSI. */ - rtwn_update_avgrssi(sc, un, rate); - } - } else - *rssi = (un != NULL) ? un->last_rssi : sc->last_rssi; + rxs.r_flags |= IEEE80211_R_NF | IEEE80211_R_RSSI; + rxs.c_nf = RTWN_NOISE_FLOOR; + rxs.c_rssi = rssi - rxs.c_nf; + (void) ieee80211_add_rx_params(m, &rxs); if (ieee80211_radiotap_active(ic)) { struct rtwn_rx_radiotap_header *tap = &sc->sc_rxtap; - int id = RTWN_VAP_ID_INVALID; - - if (ni != NULL) - id = RTWN_VAP(ni->ni_vap)->id; - if (id == RTWN_VAP_ID_INVALID) - id = 0; tap->wr_flags = rtwn_rx_radiotap_flags(sc, desc); - tap->wr_tsft = rtwn_get_tsf_high(sc, id); - if (le32toh(stat->tsf_low) > rtwn_get_tsf_low(sc, id)) - tap->wr_tsft--; - tap->wr_tsft = (uint64_t)htole32(tap->wr_tsft) << 32; - tap->wr_tsft += stat->tsf_low; - - /* XXX 20/40? */ - - /* Map HW rate index to 802.11 rate. */ - if (rate < RTWN_RIDX_MCS(0)) - tap->wr_rate = ridx2rate[rate]; - else /* MCS0~15. */ - tap->wr_rate = IEEE80211_RATE_MCS | (rate - 12); - - tap->wr_dbm_antsignal = *rssi; - tap->wr_dbm_antnoise = RTWN_NOISE_FLOOR; + tap->wr_tsft = htole64(rxs.c_rx_tsf); + tap->wr_rate = rxs.c_rate; + tap->wr_dbm_antsignal = rssi; + tap->wr_dbm_antnoise = rxs.c_nf; } /* Drop PHY descriptor. */ diff --git a/freebsd/sys/dev/rtwn/if_rtwn_rx.h b/freebsd/sys/dev/rtwn/if_rtwn_rx.h index dfdcc4bf..49897eb9 100644 --- a/freebsd/sys/dev/rtwn/if_rtwn_rx.h +++ b/freebsd/sys/dev/rtwn/if_rtwn_rx.h @@ -26,7 +26,7 @@ void rtwn_get_rates(struct rtwn_softc *, const struct ieee80211_rateset *, const struct ieee80211_htrateset *, uint32_t *, int *, int); void rtwn_set_basicrates(struct rtwn_softc *, uint32_t); struct ieee80211_node * rtwn_rx_common(struct rtwn_softc *, struct mbuf *, - void *, int8_t *); + void *); void rtwn_adhoc_recv_mgmt(struct ieee80211_node *, struct mbuf *, int, const struct ieee80211_rx_stats *, int, int); void rtwn_set_multi(struct rtwn_softc *); diff --git a/freebsd/sys/dev/rtwn/if_rtwn_tx.c b/freebsd/sys/dev/rtwn/if_rtwn_tx.c index 1ea9a766..c48e2e0e 100644 --- a/freebsd/sys/dev/rtwn/if_rtwn_tx.c +++ b/freebsd/sys/dev/rtwn/if_rtwn_tx.c @@ -114,17 +114,16 @@ static int rtwn_tx_data(struct rtwn_softc *sc, struct ieee80211_node *ni, struct mbuf *m) { - const struct ieee80211_txparam *tp; + const struct ieee80211_txparam *tp = ni->ni_txparms; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_key *k = NULL; - struct ieee80211_channel *chan; struct ieee80211_frame *wh; struct rtwn_tx_desc_common *txd; struct rtwn_tx_buf buf; uint8_t rate, ridx, type; u_int cipher; - int ismcast, maxretry; + int ismcast; RTWN_ASSERT_LOCKED(sc); @@ -132,20 +131,15 @@ rtwn_tx_data(struct rtwn_softc *sc, struct ieee80211_node *ni, type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1); - chan = (ni->ni_chan != IEEE80211_CHAN_ANYC) ? - ni->ni_chan : ic->ic_curchan; - tp = &vap->iv_txparms[ieee80211_chan2mode(chan)]; - maxretry = tp->maxretry; - /* Choose a TX rate index. */ - if (type == IEEE80211_FC0_TYPE_MGT) + if (type == IEEE80211_FC0_TYPE_MGT || + type == IEEE80211_FC0_TYPE_CTL || + (m->m_flags & M_EAPOL) != 0) rate = tp->mgmtrate; else if (ismcast) rate = tp->mcastrate; else if (tp->ucastrate != IEEE80211_FIXED_RATE_NONE) rate = tp->ucastrate; - else if (m->m_flags & M_EAPOL) - rate = tp->mgmtrate; else { if (sc->sc_ratectl == RTWN_RATECTL_NET80211) { /* XXX pass pktlen */ @@ -183,7 +177,7 @@ rtwn_tx_data(struct rtwn_softc *sc, struct ieee80211_node *ni, memset(txd, 0, sc->txdesc_len); txd->txdw1 = htole32(SM(RTWN_TXDW1_CIPHER, rtwn_get_cipher(cipher))); - rtwn_fill_tx_desc(sc, ni, m, txd, ridx, maxretry); + rtwn_fill_tx_desc(sc, ni, m, txd, ridx, tp->maxretry); if (ieee80211_radiotap_active_vap(vap)) { struct rtwn_tx_radiotap_header *tap = &sc->sc_txtap; diff --git a/freebsd/sys/dev/rtwn/if_rtwnvar.h b/freebsd/sys/dev/rtwn/if_rtwnvar.h index 0c010adb..d8754024 100644 --- a/freebsd/sys/dev/rtwn/if_rtwnvar.h +++ b/freebsd/sys/dev/rtwn/if_rtwnvar.h @@ -76,6 +76,12 @@ struct rtwn_tx_buf { uint8_t txd[RTWN_TX_DESC_SIZE]; } __attribute__((aligned(4))); +#define RTWN_PHY_STATUS_SIZE 32 +struct rtwn_tx_phystat { + uint32_t phydw[RTWN_PHY_STATUS_SIZE / sizeof(uint32_t)]; +}; + + struct rtwn_softc; union sec_param { @@ -95,7 +101,8 @@ struct rtwn_cmdq { struct rtwn_node { struct ieee80211_node ni; /* must be the first */ int id; - int8_t last_rssi; + + struct rtwn_tx_phystat last_physt; int avg_pwdb; }; #define RTWN_NODE(ni) ((struct rtwn_node *)(ni)) @@ -195,7 +202,7 @@ struct rtwn_softc { const char *name; int sc_ant; - int8_t last_rssi; + struct rtwn_tx_phystat last_physt; uint8_t thcal_temp; int cur_bcnq_id; @@ -301,6 +308,7 @@ struct rtwn_softc { void (*sc_fw_reset)(struct rtwn_softc *, int); void (*sc_fw_download_enable)(struct rtwn_softc *, int); #endif + int (*sc_llt_init)(struct rtwn_softc *); int (*sc_set_page_size)(struct rtwn_softc *); void (*sc_lc_calib)(struct rtwn_softc *); void (*sc_iq_calib)(struct rtwn_softc *); @@ -336,6 +344,9 @@ struct rtwn_softc { struct ieee80211vap *, int); void (*sc_set_rssi)(struct rtwn_softc *); #endif + void (*sc_get_rx_stats)(struct rtwn_softc *, + struct ieee80211_rx_stats *, const void *, + const void *); int8_t (*sc_get_rssi_cck)(struct rtwn_softc *, void *); int8_t (*sc_get_rssi_ofdm)(struct rtwn_softc *, void *); int (*sc_classify_intr)(struct rtwn_softc *, void *, int); @@ -462,8 +473,8 @@ void rtwn_suspend(struct rtwn_softc *); /* Aliases. */ #define rtwn_bb_write rtwn_write_4 -#define rtwn_bb_read rtwn_read_4 -#define rtwn_bb_setbits rtwn_setbits_4 +#define rtwn_bb_read rtwn_read_4 +#define rtwn_bb_setbits rtwn_setbits_4 /* Device-specific. */ #define rtwn_rf_read(_sc, _chain, _addr) \ @@ -478,6 +489,8 @@ void rtwn_suspend(struct rtwn_softc *); (((_sc)->sc_parse_rom)((_sc), (_rom))) #define rtwn_set_led(_sc, _led, _on) \ (((_sc)->sc_set_led)((_sc), (_led), (_on))) +#define rtwn_get_rx_stats(_sc, _rxs, _desc, _physt) \ + (((_sc)->sc_get_rx_stats((_sc), (_rxs), (_desc), (_physt)))) #define rtwn_get_rssi_cck(_sc, _physt) \ (((_sc)->sc_get_rssi_cck)((_sc), (_physt))) #define rtwn_get_rssi_ofdm(_sc, _physt) \ @@ -492,6 +505,8 @@ void rtwn_suspend(struct rtwn_softc *); #define rtwn_fw_download_enable(_sc, _enable) \ (((_sc)->sc_fw_download_enable)((_sc), (_enable))) #endif +#define rtwn_llt_init(_sc) \ + (((_sc)->sc_llt_init)((_sc))) #define rtwn_set_page_size(_sc) \ (((_sc)->sc_set_page_size)((_sc))) #define rtwn_lc_calib(_sc) \ diff --git a/freebsd/sys/dev/rtwn/pci/rtwn_pci_attach.c b/freebsd/sys/dev/rtwn/pci/rtwn_pci_attach.c index 5b28d27f..9813cb32 100644 --- a/freebsd/sys/dev/rtwn/pci/rtwn_pci_attach.c +++ b/freebsd/sys/dev/rtwn/pci/rtwn_pci_attach.c @@ -96,20 +96,31 @@ static void rtwn_pci_beacon_update_end(struct rtwn_softc *, static void rtwn_pci_attach_methods(struct rtwn_softc *); -static int matched_chip = RTWN_CHIP_MAX_PCI; +static const struct rtwn_pci_ident * +rtwn_pci_probe_sub(device_t dev) +{ + const struct rtwn_pci_ident *ident; + int vendor_id, device_id; + + vendor_id = pci_get_vendor(dev); + device_id = pci_get_device(dev); + + for (ident = rtwn_pci_ident_table; ident->name != NULL; ident++) + if (vendor_id == ident->vendor && device_id == ident->device) + return (ident); + + return (NULL); +} static int rtwn_pci_probe(device_t dev) { const struct rtwn_pci_ident *ident; - for (ident = rtwn_pci_ident_table; ident->name != NULL; ident++) { - if (pci_get_vendor(dev) == ident->vendor && - pci_get_device(dev) == ident->device) { - matched_chip = ident->chip; - device_set_desc(dev, ident->name); - return (BUS_PROBE_DEFAULT); - } + ident = rtwn_pci_probe_sub(dev); + if (ident != NULL) { + device_set_desc(dev, ident->name); + return (BUS_PROBE_DEFAULT); } return (ENXIO); } @@ -593,13 +604,15 @@ rtwn_pci_attach_methods(struct rtwn_softc *sc) static int rtwn_pci_attach(device_t dev) { + const struct rtwn_pci_ident *ident; struct rtwn_pci_softc *pc = device_get_softc(dev); struct rtwn_softc *sc = &pc->pc_sc; struct ieee80211com *ic = &sc->sc_ic; uint32_t lcsr; int cap_off, i, error, rid; - if (matched_chip >= RTWN_CHIP_MAX_PCI) + ident = rtwn_pci_probe_sub(dev); + if (ident == NULL) return (ENXIO); /* @@ -651,8 +664,7 @@ rtwn_pci_attach(device_t dev) mtx_init(&sc->sc_mtx, ic->ic_name, MTX_NETWORK_LOCK, MTX_DEF); rtwn_pci_attach_methods(sc); - /* XXX something similar to USB_GET_DRIVER_INFO() */ - rtwn_pci_attach_private(pc, matched_chip); + rtwn_pci_attach_private(pc, ident->chip); /* Allocate Tx/Rx buffers. */ error = rtwn_pci_alloc_rx_list(sc); diff --git a/freebsd/sys/dev/rtwn/pci/rtwn_pci_rx.c b/freebsd/sys/dev/rtwn/pci/rtwn_pci_rx.c index 8da0061b..292fb07f 100644 --- a/freebsd/sys/dev/rtwn/pci/rtwn_pci_rx.c +++ b/freebsd/sys/dev/rtwn/pci/rtwn_pci_rx.c @@ -97,7 +97,6 @@ rtwn_pci_rx_frame(struct rtwn_softc *sc, struct r92ce_rx_stat *rx_desc, struct ieee80211_node *ni; uint32_t rxdw0; struct mbuf *m, *m1; - int8_t rssi = 0, nf; int infosz, pktlen, shift, error; /* Dump Rx descriptor. */ @@ -164,12 +163,11 @@ rtwn_pci_rx_frame(struct rtwn_softc *sc, struct r92ce_rx_stat *rx_desc, rx_data->m = m1; m->m_pkthdr.len = m->m_len = pktlen + infosz + shift; - nf = RTWN_NOISE_FLOOR; - ni = rtwn_rx_common(sc, m, rx_desc, &rssi); + ni = rtwn_rx_common(sc, m, rx_desc); RTWN_DPRINTF(sc, RTWN_DEBUG_RECV, - "%s: Rx frame len %d, infosz %d, shift %d, rssi %d\n", - __func__, pktlen, infosz, shift, rssi); + "%s: Rx frame len %d, infosz %d, shift %d\n", + __func__, pktlen, infosz, shift); /* Update RX descriptor. */ rtwn_pci_setup_rx_desc(pc, rx_desc, rx_data->paddr, MJUMPAGESIZE, @@ -178,11 +176,11 @@ rtwn_pci_rx_frame(struct rtwn_softc *sc, struct r92ce_rx_stat *rx_desc, /* Send the frame to the 802.11 layer. */ RTWN_UNLOCK(sc); if (ni != NULL) { - (void)ieee80211_input(ni, m, rssi - nf, nf); + (void)ieee80211_input_mimo(ni, m); /* Node is no longer needed. */ ieee80211_free_node(ni); } else - (void)ieee80211_input_all(ic, m, rssi - nf, nf); + (void)ieee80211_input_mimo_all(ic, m); RTWN_LOCK(sc); @@ -284,17 +282,6 @@ rtwn_pci_rx_done(struct rtwn_softc *sc) ring->cur = (ring->cur + 1) % RTWN_PCI_RX_LIST_COUNT; } - - /* Finished receive; age anything left on the FF queue by a little bump */ - /* - * XXX TODO: just make this a callout timer schedule so we can - * flush the FF staging queue if we're approaching idle. - */ -#ifdef IEEE80211_SUPPORT_SUPERG - if (!(sc->sc_flags & RTWN_FW_LOADED) || - sc->sc_ratectl != RTWN_RATECTL_NET80211) - rtwn_cmd_sleepable(sc, NULL, 0, rtwn_ff_flush_all); -#endif } void diff --git a/freebsd/sys/dev/rtwn/rtl8188e/r88e.h b/freebsd/sys/dev/rtwn/rtl8188e/r88e.h index 999ab400..1c03ddd3 100644 --- a/freebsd/sys/dev/rtwn/rtl8188e/r88e.h +++ b/freebsd/sys/dev/rtwn/rtl8188e/r88e.h @@ -85,6 +85,8 @@ void r88e_ratectl_tx_complete(struct rtwn_softc *, uint8_t *, int); void r88e_handle_c2h_report(struct rtwn_softc *, uint8_t *, int); int8_t r88e_get_rssi_cck(struct rtwn_softc *, void *); int8_t r88e_get_rssi_ofdm(struct rtwn_softc *, void *); +void r88e_get_rx_stats(struct rtwn_softc *, struct ieee80211_rx_stats *, + const void *, const void *); /* r88e_tx.c */ void r88e_tx_enable_ampdu(void *, int); diff --git a/freebsd/sys/dev/rtwn/rtl8188e/r88e_fw.c b/freebsd/sys/dev/rtwn/rtl8188e/r88e_fw.c index 409084f6..fb7743ed 100644 --- a/freebsd/sys/dev/rtwn/rtl8188e/r88e_fw.c +++ b/freebsd/sys/dev/rtwn/rtl8188e/r88e_fw.c @@ -71,7 +71,7 @@ r88e_fw_cmd(struct rtwn_softc *sc, uint8_t id, const void *buf, int len) } /* Wait for current FW box to be empty. */ - for (ntries = 0; ntries < 50; ntries++) { + for (ntries = 0; ntries < 100; ntries++) { if (!(rtwn_read_1(sc, R92C_HMETFR) & (1 << sc->fwcur))) break; rtwn_delay(sc, 2000); diff --git a/freebsd/sys/dev/rtwn/rtl8188e/r88e_rx.c b/freebsd/sys/dev/rtwn/rtl8188e/r88e_rx.c index 464542b4..acffb40e 100644 --- a/freebsd/sys/dev/rtwn/rtl8188e/r88e_rx.c +++ b/freebsd/sys/dev/rtwn/rtl8188e/r88e_rx.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -211,3 +212,19 @@ r88e_get_rssi_ofdm(struct rtwn_softc *sc, void *physt) return (rssi); } + +void +r88e_get_rx_stats(struct rtwn_softc *sc, struct ieee80211_rx_stats *rxs, + const void *desc, const void *physt_ptr) +{ + const struct r88e_rx_phystat *physt = physt_ptr; + + r92c_get_rx_stats(sc, rxs, desc, physt_ptr); + + if (!sc->sc_ht40) { /* XXX center channel */ + rxs->r_flags |= IEEE80211_R_IEEE | IEEE80211_R_FREQ; + rxs->c_ieee = le16toh(physt->chan); + rxs->c_freq = ieee80211_ieee2mhz(rxs->c_ieee, + IEEE80211_CHAN_2GHZ); + } +} diff --git a/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c b/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c index 4d5452be..f834fb38 100644 --- a/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c +++ b/freebsd/sys/dev/rtwn/rtl8188e/usb/r88eu_attach.c @@ -129,6 +129,7 @@ r88eu_attach(struct rtwn_usb_softc *uc) sc->sc_dump_tx_desc = r92cu_dump_tx_desc; sc->sc_tx_radiotap_flags = r92c_tx_radiotap_flags; sc->sc_rx_radiotap_flags = r92c_rx_radiotap_flags; + sc->sc_get_rx_stats = r88e_get_rx_stats; sc->sc_get_rssi_cck = r88e_get_rssi_cck; sc->sc_get_rssi_ofdm = r88e_get_rssi_ofdm; sc->sc_classify_intr = r88eu_classify_intr; @@ -147,6 +148,7 @@ r88eu_attach(struct rtwn_usb_softc *uc) sc->sc_fw_reset = r88e_fw_reset; sc->sc_fw_download_enable = r88e_fw_download_enable; #endif + sc->sc_llt_init = r92c_llt_init; sc->sc_set_page_size = r92c_set_page_size; sc->sc_lc_calib = r92c_lc_calib; sc->sc_iq_calib = r88e_iq_calib; /* XXX TODO */ diff --git a/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_attach.c b/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_attach.c index d53dbf98..225c69f5 100644 --- a/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_attach.c +++ b/freebsd/sys/dev/rtwn/rtl8192c/pci/r92ce_attach.c @@ -174,6 +174,7 @@ r92ce_attach(struct rtwn_pci_softc *pc) sc->sc_dump_tx_desc = r92ce_dump_tx_desc; sc->sc_tx_radiotap_flags = r92c_tx_radiotap_flags; sc->sc_rx_radiotap_flags = r92c_rx_radiotap_flags; + sc->sc_get_rx_stats = r92c_get_rx_stats; sc->sc_get_rssi_cck = r92c_get_rssi_cck; sc->sc_get_rssi_ofdm = r92c_get_rssi_ofdm; sc->sc_classify_intr = r92ce_classify_intr; @@ -192,6 +193,7 @@ r92ce_attach(struct rtwn_pci_softc *pc) sc->sc_fw_reset = r92ce_fw_reset; sc->sc_fw_download_enable = r92c_fw_download_enable; #endif + sc->sc_llt_init = r92c_llt_init; sc->sc_set_page_size = r92c_set_page_size; sc->sc_lc_calib = r92c_lc_calib; sc->sc_iq_calib = r92ce_iq_calib; diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c.h b/freebsd/sys/dev/rtwn/rtl8192c/r92c.h index 2b63179e..5ac666d0 100644 --- a/freebsd/sys/dev/rtwn/rtl8192c/r92c.h +++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c.h @@ -77,6 +77,7 @@ void r92c_handle_c2h_report(void *); /* r92c_init.c */ int r92c_check_condition(struct rtwn_softc *, const uint8_t[]); +int r92c_llt_init(struct rtwn_softc *); int r92c_set_page_size(struct rtwn_softc *); void r92c_init_bb_common(struct rtwn_softc *); int r92c_init_rf_chain(struct rtwn_softc *, @@ -87,6 +88,9 @@ void r92c_init_ampdu(struct rtwn_softc *); void r92c_init_antsel(struct rtwn_softc *); void r92c_pa_bias_init(struct rtwn_softc *); +/* r92c_llt.c */ +int r92c_llt_write(struct rtwn_softc *, uint32_t, uint32_t); + /* r92c_rf.c */ uint32_t r92c_rf_read(struct rtwn_softc *, int, uint8_t); void r92c_rf_write(struct rtwn_softc *, int, uint8_t, uint32_t); @@ -99,6 +103,8 @@ void r92c_parse_rom(struct rtwn_softc *, uint8_t *); int8_t r92c_get_rssi_cck(struct rtwn_softc *, void *); int8_t r92c_get_rssi_ofdm(struct rtwn_softc *, void *); uint8_t r92c_rx_radiotap_flags(const void *); +void r92c_get_rx_stats(struct rtwn_softc *, struct ieee80211_rx_stats *, + const void *, const void *); /* r92c_tx.c */ void r92c_tx_enable_ampdu(void *, int); diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_fw.c b/freebsd/sys/dev/rtwn/rtl8192c/r92c_fw.c index 74c7d205..91bcfc0e 100644 --- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_fw.c +++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_fw.c @@ -82,7 +82,7 @@ r92c_fw_cmd(struct rtwn_softc *sc, uint8_t id, const void *buf, int len) } /* Wait for current FW box to be empty. */ - for (ntries = 0; ntries < 50; ntries++) { + for (ntries = 0; ntries < 100; ntries++) { if (!(rtwn_read_1(sc, R92C_HMETFR) & (1 << sc->fwcur))) break; rtwn_delay(sc, 2000); diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_init.c b/freebsd/sys/dev/rtwn/rtl8192c/r92c_init.c index d8db0286..4ec44045 100644 --- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_init.c +++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_init.c @@ -91,6 +91,32 @@ r92c_check_condition(struct rtwn_softc *sc, const uint8_t cond[]) return (0); } +int +r92c_llt_init(struct rtwn_softc *sc) +{ + int i, error; + + /* Reserve pages [0; page_count]. */ + for (i = 0; i < sc->page_count; i++) { + if ((error = r92c_llt_write(sc, i, i + 1)) != 0) + return (error); + } + /* NB: 0xff indicates end-of-list. */ + if ((error = r92c_llt_write(sc, i, 0xff)) != 0) + return (error); + /* + * Use pages [page_count + 1; pktbuf_count - 1] + * as ring buffer. + */ + for (++i; i < sc->pktbuf_count - 1; i++) { + if ((error = r92c_llt_write(sc, i, i + 1)) != 0) + return (error); + } + /* Make the last page point to the beginning of the ring buffer. */ + error = r92c_llt_write(sc, i, sc->page_count + 1); + return (error); +} + int r92c_set_page_size(struct rtwn_softc *sc) { diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_reg.h b/freebsd/sys/dev/rtwn/rtl8192c/r92c_reg.h index ff03d191..34a4b80c 100644 --- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_reg.h +++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_reg.h @@ -66,6 +66,7 @@ #define R92C_HSIMR 0x058 #define R92C_HSISR 0x05c #define R92C_MULTI_FUNC_CTRL 0x068 +#define R92C_LDO_SWR_CTRL 0x07c #define R92C_MCUFWDL 0x080 #define R92C_HMEBOX_EXT(idx) (0x088 + (idx) * 2) #define R92C_EFUSE_ACCESS 0x0cf @@ -115,6 +116,7 @@ #define R92C_TXDMA_OFFSET_CHK 0x20c #define R92C_TXDMA_STATUS 0x210 #define R92C_RQPN_NPQ 0x214 +#define R92C_AUTO_LLT 0x224 /* Rx DMA Configuration. */ #define R92C_RXDMA_AGG_PG_TH 0x280 #define R92C_RXPKT_NUM 0x284 @@ -297,6 +299,16 @@ #define R92C_SYS_CLKR_SYS_EN 0x00001000 #define R92C_SYS_CLKR_RING_EN 0x00002000 +/* Bits for R92C_RSV_CTRL. */ +#define R92C_RSV_CTRL_WLOCK_ALL 0x01 +#define R92C_RSV_CTRL_WLOCK_00 0x02 +#define R92C_RSV_CTRL_WLOCK_04 0x04 +#define R92C_RSV_CTRL_WLOCK_08 0x08 +#define R92C_RSV_CTRL_WLOCK_40 0x10 +#define R92C_RSV_CTRL_R_DIS_PRST_0 0x20 +#define R92C_RSV_CTRL_R_DIS_PRST_1 0x40 +#define R92C_RSV_CTRL_LOCK_ALL_EN 0x80 + /* Bits for R92C_RF_CTRL. */ #define R92C_RF_CTRL_EN 0x01 #define R92C_RF_CTRL_RSTB 0x02 @@ -339,6 +351,9 @@ /* Bits for R92C_LEDCFG0. */ #define R92C_LEDCFG0_DIS 0x08 +/* Bits for R92C_LEDCFG1. */ +#define R92C_LEDCFG1_DIS 0x80 + /* Bits for R92C_MULTI_FUNC_CTRL. */ #define R92C_MULTI_BT_FUNC_EN 0x00040000 @@ -420,6 +435,7 @@ #define R92C_PBP_1024 4 /* Bits for R92C_TRXDMA_CTRL. */ +#define R92C_TRXDMA_CTRL_RX_SHIFT_EN 0x0002 #define R92C_TRXDMA_CTRL_RXDMA_AGG_EN 0x0004 #define R92C_TRXDMA_CTRL_TXDMA_VOQ_MAP_M 0x0030 #define R92C_TRXDMA_CTRL_TXDMA_VOQ_MAP_S 4 @@ -476,6 +492,9 @@ /* Bits for R92C_TXDMA_OFFSET_CHK. */ #define R92C_TXDMA_OFFSET_DROP_DATA_EN 0x00000200 +/* Bits for R92C_AUTO_LLT. */ +#define R92C_AUTO_LLT_INIT 0x00010000 + /* Bits for R92C_FWHW_TXQ_CTRL. */ #define R92C_FWHW_TXQ_CTRL_AMPDU_RTY_NEW 0x80 #define R92C_FWHW_TXQ_CTRL_REAL_BEACON 0x400000 @@ -593,7 +612,8 @@ #define R92C_RCR_APPFCS 0x80000000 /* Bits for R92C_RX_DRVINFO_SZ. */ -#define R92C_RX_DRVINFO_SZ_DEF 4 /* XXX other values will not work */ +/* XXX other values will not work */ +#define R92C_RX_DRVINFO_SZ_DEF ((RTWN_PHY_STATUS_SIZE) / 8) /* Bits for R92C_WMAC_TRXPTCL_CTL. */ #define R92C_WMAC_TRXPTCL_SHPRE 0x00020000 @@ -681,6 +701,7 @@ #define R92C_OFDM0_TXIQIMBALANCE(chain) (0xc80 + (chain) * 8) #define R92C_OFDM0_TXAFE(chain) (0xc94 + (chain) * 8) #define R92C_OFDM0_RXIQEXTANTA 0xca0 +#define R92C_OFDM0_TXPSEUDONOISEWGT 0xce4 #define R92C_OFDM1_LSTF 0xd00 /* Bits for R92C_FPGA[01]_RFMOD. */ @@ -800,6 +821,9 @@ #define R92C_LSSI_READBACK_DATA_M 0x000fffff #define R92C_LSSI_READBACK_DATA_S 0 +/* Bits for R92C_CCK0_SYSTEM. */ +#define R92C_CCK0_SYSTEM_CCK_SIDEBAND 0x00000010 + /* Bits for R92C_OFDM0_AGCCORE1(i). */ #define R92C_OFDM0_AGCCORE1_GAIN_M 0x0000007f #define R92C_OFDM0_AGCCORE1_GAIN_S 0 diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_rx.c b/freebsd/sys/dev/rtwn/rtl8192c/r92c_rx.c index b77c76f6..70dff0f6 100644 --- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_rx.c +++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_rx.c @@ -102,3 +102,47 @@ r92c_rx_radiotap_flags(const void *buf) flags = IEEE80211_RADIOTAP_F_SHORTGI; return (flags); } + +void +r92c_get_rx_stats(struct rtwn_softc *sc, struct ieee80211_rx_stats *rxs, + const void *desc, const void *physt_ptr) +{ + const struct r92c_rx_stat *stat = desc; + uint32_t rxdw1, rxdw3; + uint8_t rate; + + rxdw1 = le32toh(stat->rxdw1); + rxdw3 = le32toh(stat->rxdw3); + rate = MS(rxdw3, R92C_RXDW3_RATE); + + if (rxdw1 & R92C_RXDW1_AMPDU) + rxs->c_pktflags |= IEEE80211_RX_F_AMPDU; + else if (rxdw1 & R92C_RXDW1_AMPDU_MORE) + rxs->c_pktflags |= IEEE80211_RX_F_AMPDU_MORE; + if ((rxdw3 & R92C_RXDW3_SPLCP) && rate >= RTWN_RIDX_MCS(0)) + rxs->c_pktflags |= IEEE80211_RX_F_SHORTGI; + + if (rxdw3 & R92C_RXDW3_HT40) + rxs->c_width = IEEE80211_RX_FW_40MHZ; + else + rxs->c_width = IEEE80211_RX_FW_20MHZ; + + if (RTWN_RATE_IS_CCK(rate)) + rxs->c_phytype = IEEE80211_RX_FP_11B; + else if (rate < RTWN_RIDX_MCS(0)) + rxs->c_phytype = IEEE80211_RX_FP_11G; + else + rxs->c_phytype = IEEE80211_RX_FP_11NG; + + /* Map HW rate index to 802.11 rate. */ + if (rate < RTWN_RIDX_MCS(0)) { + rxs->c_rate = ridx2rate[rate]; + if (RTWN_RATE_IS_CCK(rate)) + rxs->c_pktflags |= IEEE80211_RX_F_CCK; + else + rxs->c_pktflags |= IEEE80211_RX_F_OFDM; + } else { /* MCS0~15. */ + rxs->c_rate = IEEE80211_RATE_MCS | (rate - 12); + rxs->c_pktflags |= IEEE80211_RX_F_HT; + } +} diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_rx_desc.h b/freebsd/sys/dev/rtwn/rtl8192c/r92c_rx_desc.h index 7fec70be..12dfd665 100644 --- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_rx_desc.h +++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_rx_desc.h @@ -45,6 +45,9 @@ struct r92c_rx_stat { uint32_t rxdw1; #define R92C_RXDW1_MACID_M 0x0000001f #define R92C_RXDW1_MACID_S 0 +#define R92C_RXDW1_AMSDU 0x00002000 +#define R92C_RXDW1_AMPDU_MORE 0x00004000 +#define R92C_RXDW1_AMPDU 0x00008000 #define R92C_RXDW1_MC 0x40000000 #define R92C_RXDW1_BC 0x80000000 @@ -56,6 +59,8 @@ struct r92c_rx_stat { #define R92C_RXDW3_SPLCP 0x00000100 #define R92C_RXDW3_HT40 0x00000200 #define R92C_RXDW3_HTC 0x00000400 +#define R92C_RXDW3_BSSID_FIT_M 0x00003000 +#define R92C_RXDW3_BSSID_FIT_S 12 uint32_t rxdw4; uint32_t tsf_low; diff --git a/freebsd/sys/dev/rtwn/rtl8192c/r92c_tx_desc.h b/freebsd/sys/dev/rtwn/rtl8192c/r92c_tx_desc.h index 037ac0e2..c3bc87ca 100644 --- a/freebsd/sys/dev/rtwn/rtl8192c/r92c_tx_desc.h +++ b/freebsd/sys/dev/rtwn/rtl8192c/r92c_tx_desc.h @@ -68,7 +68,7 @@ struct r92c_tx_desc { uint16_t txdseq; uint32_t txdw4; -#define R92C_TXDW4_RTSRATE_M 0x0000003f +#define R92C_TXDW4_RTSRATE_M 0x0000001f #define R92C_TXDW4_RTSRATE_S 0 #define R92C_TXDW4_SEQ_SEL_M 0x00000040 #define R92C_TXDW4_SEQ_SEL_S 6 diff --git a/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_attach.c b/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_attach.c index ce3f7a1a..aa6f7067 100644 --- a/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_attach.c +++ b/freebsd/sys/dev/rtwn/rtl8192c/usb/r92cu_attach.c @@ -167,6 +167,7 @@ r92cu_attach(struct rtwn_usb_softc *uc) sc->sc_dump_tx_desc = r92cu_dump_tx_desc; sc->sc_tx_radiotap_flags = r92c_tx_radiotap_flags; sc->sc_rx_radiotap_flags = r92c_rx_radiotap_flags; + sc->sc_get_rx_stats = r92c_get_rx_stats; sc->sc_get_rssi_cck = r92c_get_rssi_cck; sc->sc_get_rssi_ofdm = r92c_get_rssi_ofdm; sc->sc_classify_intr = r92cu_classify_intr; @@ -185,6 +186,7 @@ r92cu_attach(struct rtwn_usb_softc *uc) sc->sc_fw_reset = r92c_fw_reset; sc->sc_fw_download_enable = r92c_fw_download_enable; #endif + sc->sc_llt_init = r92c_llt_init; sc->sc_set_page_size = r92c_set_page_size; sc->sc_lc_calib = r92c_lc_calib; sc->sc_iq_calib = r92c_iq_calib; /* XXX TODO */ diff --git a/freebsd/sys/dev/rtwn/rtl8812a/r12a.h b/freebsd/sys/dev/rtwn/rtl8812a/r12a.h index ec1d61e1..e8de45aa 100644 --- a/freebsd/sys/dev/rtwn/rtl8812a/r12a.h +++ b/freebsd/sys/dev/rtwn/rtl8812a/r12a.h @@ -128,6 +128,8 @@ void r12a_ratectl_tx_complete(struct rtwn_softc *, uint8_t *, int); void r12a_handle_c2h_report(struct rtwn_softc *, uint8_t *, int); int r12a_check_frame_checksum(struct rtwn_softc *, struct mbuf *); uint8_t r12a_rx_radiotap_flags(const void *); +void r12a_get_rx_stats(struct rtwn_softc *, struct ieee80211_rx_stats *, + const void *, const void *); /* r12a_tx.c */ void r12a_fill_tx_desc(struct rtwn_softc *, struct ieee80211_node *, diff --git a/freebsd/sys/dev/rtwn/rtl8812a/r12a_beacon.c b/freebsd/sys/dev/rtwn/rtl8812a/r12a_beacon.c index 37b1a183..67714442 100644 --- a/freebsd/sys/dev/rtwn/rtl8812a/r12a_beacon.c +++ b/freebsd/sys/dev/rtwn/rtl8812a/r12a_beacon.c @@ -79,6 +79,8 @@ r12a_beacon_init(struct rtwn_softc *sc, void *buf, int id) txd->txdw3 = htole32(R12A_TXDW3_DRVRATE); txd->txdw3 |= htole32(SM(R12A_TXDW3_SEQ_SEL, id)); + txd->txdw4 = htole32(SM(R12A_TXDW4_DATARATE, RTWN_RIDX_CCK1)); + txd->txdw6 = htole32(SM(R21A_TXDW6_MBSSID, id)); } diff --git a/freebsd/sys/dev/rtwn/rtl8812a/r12a_fw.c b/freebsd/sys/dev/rtwn/rtl8812a/r12a_fw.c index 12a3d855..f3bbc099 100644 --- a/freebsd/sys/dev/rtwn/rtl8812a/r12a_fw.c +++ b/freebsd/sys/dev/rtwn/rtl8812a/r12a_fw.c @@ -70,14 +70,14 @@ void r12a_fw_reset(struct rtwn_softc *sc, int reason) { /* Reset MCU IO wrapper. */ - rtwn_setbits_1(sc, R92C_RSV_CTRL, 0x02, 0); + rtwn_setbits_1(sc, R92C_RSV_CTRL, R92C_RSV_CTRL_WLOCK_00, 0); rtwn_setbits_1(sc, R92C_RSV_CTRL + 1, 0x08, 0); rtwn_setbits_1_shift(sc, R92C_SYS_FUNC_EN, R92C_SYS_FUNC_EN_CPUEN, 0, 1); /* Enable MCU IO wrapper. */ - rtwn_setbits_1(sc, R92C_RSV_CTRL, 0x02, 0); + rtwn_setbits_1(sc, R92C_RSV_CTRL, R92C_RSV_CTRL_WLOCK_00, 0); rtwn_setbits_1(sc, R92C_RSV_CTRL + 1, 0, 0x08); rtwn_setbits_1_shift(sc, R92C_SYS_FUNC_EN, diff --git a/freebsd/sys/dev/rtwn/rtl8812a/r12a_rx.c b/freebsd/sys/dev/rtwn/rtl8812a/r12a_rx.c index 049717a4..b9c3bbf8 100644 --- a/freebsd/sys/dev/rtwn/rtl8812a/r12a_rx.c +++ b/freebsd/sys/dev/rtwn/rtl8812a/r12a_rx.c @@ -231,10 +231,99 @@ r12a_rx_radiotap_flags(const void *buf) if (!(stat->rxdw4 & htole32(R12A_RXDW4_SPLCP))) return (0); - rate = MS(le32toh(stat->rxdw3), R92C_RXDW3_RATE); + rate = MS(le32toh(stat->rxdw3), R12A_RXDW3_RATE); if (RTWN_RATE_IS_CCK(rate)) flags = IEEE80211_RADIOTAP_F_SHORTPRE; else flags = IEEE80211_RADIOTAP_F_SHORTGI; return (flags); } + +void +r12a_get_rx_stats(struct rtwn_softc *sc, struct ieee80211_rx_stats *rxs, + const void *desc, const void *physt_ptr) +{ + const struct r92c_rx_stat *stat = desc; + const struct r12a_rx_phystat *physt = physt_ptr; + uint32_t rxdw0, rxdw1, rxdw3, rxdw4; + uint8_t rate; + + rxdw0 = le32toh(stat->rxdw0); + rxdw1 = le32toh(stat->rxdw1); + rxdw3 = le32toh(stat->rxdw3); + rxdw4 = le32toh(stat->rxdw4); + rate = MS(rxdw3, R12A_RXDW3_RATE); + + /* TODO: STBC */ + if (rxdw4 & R12A_RXDW4_LDPC) + rxs->c_pktflags |= IEEE80211_RX_F_LDPC; + if (rxdw1 & R12A_RXDW1_AMPDU) { + if (rxdw0 & R92C_RXDW0_PHYST) + rxs->c_pktflags |= IEEE80211_RX_F_AMPDU; + else + rxs->c_pktflags |= IEEE80211_RX_F_AMPDU_MORE; + } + + if ((rxdw4 & R12A_RXDW4_SPLCP) && rate >= RTWN_RIDX_MCS(0)) + rxs->c_pktflags |= IEEE80211_RX_F_SHORTGI; + + switch (MS(rxdw4, R12A_RXDW4_BW)) { + case R12A_RXDW4_BW20: + rxs->c_width = IEEE80211_RX_FW_20MHZ; + break; + case R12A_RXDW4_BW40: + rxs->c_width = IEEE80211_RX_FW_40MHZ; + break; + case R12A_RXDW4_BW80: + rxs->c_width = IEEE80211_RX_FW_80MHZ; + break; + default: + break; + } + + if (RTWN_RATE_IS_CCK(rate)) + rxs->c_phytype = IEEE80211_RX_FP_11B; + else { + int is5ghz; + + /* XXX magic */ + /* XXX check with RTL8812AU */ + is5ghz = (physt->cfosho[2] != 0x01); + + if (rate < RTWN_RIDX_MCS(0)) { + if (is5ghz) + rxs->c_phytype = IEEE80211_RX_FP_11A; + else + rxs->c_phytype = IEEE80211_RX_FP_11G; + } else { + if (is5ghz) + rxs->c_phytype = IEEE80211_RX_FP_11NA; + else + rxs->c_phytype = IEEE80211_RX_FP_11NG; + } + } + + /* Map HW rate index to 802.11 rate. */ + if (rate < RTWN_RIDX_MCS(0)) { + rxs->c_rate = ridx2rate[rate]; + if (RTWN_RATE_IS_CCK(rate)) + rxs->c_pktflags |= IEEE80211_RX_F_CCK; + else + rxs->c_pktflags |= IEEE80211_RX_F_OFDM; + } else { /* MCS0~15. */ + /* TODO: VHT rates */ + rxs->c_rate = IEEE80211_RATE_MCS | (rate - 12); + rxs->c_pktflags |= IEEE80211_RX_F_HT; + } + + /* + * XXX always zero for RTL8821AU + * (vendor driver does not check this field) + */ +#if 0 + rxs->r_flags |= IEEE80211_R_IEEE | IEEE80211_R_FREQ; + rxs->c_ieee = MS(le16toh(physt->phyw1), R12A_PHYW1_CHAN); + rxs->c_freq = ieee80211_ieee2mhz(rxs->c_ieee, + (rxs->c_ieee < 36) ? IEEE80211_CHAN_2GHZ : IEEE80211_CHAN_5GHZ); +#endif +} diff --git a/freebsd/sys/dev/rtwn/rtl8812a/r12a_rx_desc.h b/freebsd/sys/dev/rtwn/rtl8812a/r12a_rx_desc.h index 8642ca85..c3d19527 100644 --- a/freebsd/sys/dev/rtwn/rtl8812a/r12a_rx_desc.h +++ b/freebsd/sys/dev/rtwn/rtl8812a/r12a_rx_desc.h @@ -34,18 +34,26 @@ /* Rx MAC descriptor defines (chip-specific). */ /* Rx dword 1 */ #define R12A_RXDW1_AMSDU 0x00002000 +#define R12A_RXDW1_AMPDU 0x00008000 #define R12A_RXDW1_CKSUM_ERR 0x00100000 #define R12A_RXDW1_IPV6 0x00200000 #define R12A_RXDW1_UDP 0x00400000 #define R12A_RXDW1_CKSUM 0x00800000 /* Rx dword 2 */ #define R12A_RXDW2_RPT_C2H 0x10000000 +/* Rx dword 3 */ +#define R12A_RXDW3_RATE_M 0x0000007f +#define R12A_RXDW3_RATE_S 0 /* Rx dword 4 */ #define R12A_RXDW4_SPLCP 0x00000001 #define R12A_RXDW4_LDPC 0x00000002 #define R12A_RXDW4_STBC 0x00000004 #define R12A_RXDW4_BW_M 0x00000030 #define R12A_RXDW4_BW_S 4 +#define R12A_RXDW4_BW20 0 +#define R12A_RXDW4_BW40 1 +#define R12A_RXDW4_BW80 2 +#define R12A_RXDW4_BW160 3 /* Rx PHY descriptor. */ struct r12a_rx_phystat { diff --git a/freebsd/sys/dev/rtwn/rtl8812a/r12a_tx.c b/freebsd/sys/dev/rtwn/rtl8812a/r12a_tx.c index f7bd3a8e..40d54634 100644 --- a/freebsd/sys/dev/rtwn/rtl8812a/r12a_tx.c +++ b/freebsd/sys/dev/rtwn/rtl8812a/r12a_tx.c @@ -216,6 +216,17 @@ r12a_tx_set_sgi(struct rtwn_softc *sc, void *buf, struct ieee80211_node *ni) txd->txdw5 |= htole32(R12A_TXDW5_DATA_SHORT); } +static void +r12a_tx_set_ldpc(struct rtwn_softc *sc, struct r12a_tx_desc *txd, + struct ieee80211_node *ni) +{ + struct ieee80211vap *vap = ni->ni_vap; + + if ((vap->iv_flags_ht & IEEE80211_FHT_LDPC_TX) && + (ni->ni_htcap & IEEE80211_HTCAP_LDPC)) + txd->txdw5 |= htole32(R12A_TXDW5_DATA_LDPC); +} + void r12a_fill_tx_desc(struct rtwn_softc *sc, struct ieee80211_node *ni, struct mbuf *m, void *buf, uint8_t ridx, int maxretry) @@ -286,6 +297,7 @@ r12a_fill_tx_desc(struct rtwn_softc *sc, struct ieee80211_node *ni, if (ridx >= RTWN_RIDX_MCS(0)) { r12a_tx_set_ht40(sc, txd, ni); r12a_tx_set_sgi(sc, txd, ni); + r12a_tx_set_ldpc(sc, txd, ni); prot = ic->ic_htprotmode; } else if (ic->ic_flags & IEEE80211_F_USEPROT) prot = ic->ic_protmode; diff --git a/freebsd/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c b/freebsd/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c index 684076eb..97d966f0 100644 --- a/freebsd/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c +++ b/freebsd/sys/dev/rtwn/rtl8812a/usb/r12au_attach.c @@ -170,7 +170,15 @@ r12a_read_chipid_vendor(struct rtwn_softc *sc, uint32_t reg_sys_cfg) static void r12au_adj_devcaps(struct rtwn_softc *sc) { - /* TODO: LDPC, STBC etc */ + struct r12a_softc *rs = sc->sc_priv; + struct ieee80211com *ic = &sc->sc_ic; + + if (rs->chip & R12A_CHIP_C_CUT) { + ic->ic_htcaps |= IEEE80211_HTCAP_LDPC | + IEEE80211_HTC_TXLDPC; + } + + /* TODO: STBC, VHT etc */ } void @@ -192,6 +200,7 @@ r12au_attach(struct rtwn_usb_softc *uc) sc->sc_dump_tx_desc = r12au_dump_tx_desc; sc->sc_tx_radiotap_flags = r12a_tx_radiotap_flags; sc->sc_rx_radiotap_flags = r12a_rx_radiotap_flags; + sc->sc_get_rx_stats = r12a_get_rx_stats; sc->sc_get_rssi_cck = r88e_get_rssi_cck; sc->sc_get_rssi_ofdm = r88e_get_rssi_ofdm; sc->sc_classify_intr = r12au_classify_intr; @@ -208,6 +217,7 @@ r12au_attach(struct rtwn_usb_softc *uc) sc->sc_fw_reset = r12a_fw_reset; sc->sc_fw_download_enable = r12a_fw_download_enable; #endif + sc->sc_llt_init = r92c_llt_init; sc->sc_set_page_size = r12a_set_page_size; sc->sc_lc_calib = r12a_lc_calib; sc->sc_iq_calib = r12a_iq_calib; diff --git a/freebsd/sys/dev/rtwn/rtl8821a/r21a_init.c b/freebsd/sys/dev/rtwn/rtl8821a/r21a_init.c index a3bcde77..e2c3972f 100644 --- a/freebsd/sys/dev/rtwn/rtl8821a/r21a_init.c +++ b/freebsd/sys/dev/rtwn/rtl8821a/r21a_init.c @@ -176,7 +176,7 @@ r21a_power_on(struct rtwn_softc *sc) R92C_CR_CALTMR_EN)); if (rtwn_read_4(sc, R92C_SYS_CFG) & R92C_SYS_CFG_TRP_BT_EN) - RTWN_CHK(rtwn_setbits_1(sc, 0x07C, 0, 0x40)); + RTWN_CHK(rtwn_setbits_1(sc, R92C_LDO_SWR_CTRL, 0, 0x40)); return (0); #undef RTWN_CHK diff --git a/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c b/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c index 6f7129f8..145aca21 100644 --- a/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c +++ b/freebsd/sys/dev/rtwn/rtl8821a/usb/r21au_attach.c @@ -159,10 +159,11 @@ r21au_adj_devcaps(struct rtwn_softc *sc) struct ieee80211com *ic = &sc->sc_ic; struct r12a_softc *rs = sc->sc_priv; + ic->ic_htcaps |= IEEE80211_HTC_TXLDPC; if (rs->rs_radar != 0) ic->ic_caps |= IEEE80211_C_DFS; - /* TODO: LDPC etc */ + /* TODO: VHT */ } void @@ -184,6 +185,7 @@ r21au_attach(struct rtwn_usb_softc *uc) sc->sc_dump_tx_desc = r12au_dump_tx_desc; sc->sc_tx_radiotap_flags = r12a_tx_radiotap_flags; sc->sc_rx_radiotap_flags = r12a_rx_radiotap_flags; + sc->sc_get_rx_stats = r12a_get_rx_stats; sc->sc_get_rssi_cck = r21a_get_rssi_cck; sc->sc_get_rssi_ofdm = r88e_get_rssi_ofdm; sc->sc_classify_intr = r12au_classify_intr; @@ -201,6 +203,7 @@ r21au_attach(struct rtwn_usb_softc *uc) sc->sc_fw_reset = r21a_fw_reset; sc->sc_fw_download_enable = r12a_fw_download_enable; #endif + sc->sc_llt_init = r92c_llt_init; sc->sc_set_page_size = rtwn_nop_int_softc; sc->sc_lc_calib = rtwn_nop_softc; /* XXX not used */ sc->sc_iq_calib = r12a_iq_calib; diff --git a/freebsd/sys/dev/rtwn/usb/rtwn_usb_attach.h b/freebsd/sys/dev/rtwn/usb/rtwn_usb_attach.h index 48a4d6e5..ee6d9137 100644 --- a/freebsd/sys/dev/rtwn/usb/rtwn_usb_attach.h +++ b/freebsd/sys/dev/rtwn/usb/rtwn_usb_attach.h @@ -21,12 +21,14 @@ */ void r92cu_attach(struct rtwn_usb_softc *); +void r92eu_attach(struct rtwn_usb_softc *); void r88eu_attach(struct rtwn_usb_softc *); void r12au_attach(struct rtwn_usb_softc *); void r21au_attach(struct rtwn_usb_softc *); enum { RTWN_CHIP_RTL8192CU, + RTWN_CHIP_RTL8192EU, RTWN_CHIP_RTL8188EU, RTWN_CHIP_RTL8812AU, RTWN_CHIP_RTL8821AU, @@ -92,7 +94,6 @@ static const STRUCT_USB_HOST_ID rtwn_devs[] = { RTWN_RTL8192CU_DEV(REALTEK, RTL8191CU), RTWN_RTL8192CU_DEV(REALTEK, RTL8192CE), RTWN_RTL8192CU_DEV(REALTEK, RTL8192CU), - RTWN_RTL8192CU_DEV(REALTEK, RTL8192CU_1), RTWN_RTL8192CU_DEV(SITECOMEU, RTL8188CU_1), RTWN_RTL8192CU_DEV(SITECOMEU, RTL8188CU_2), RTWN_RTL8192CU_DEV(SITECOMEU, RTL8192CU), @@ -101,6 +102,15 @@ static const STRUCT_USB_HOST_ID rtwn_devs[] = { RTWN_RTL8192CU_DEV(ZYXEL, RTL8192CU), #undef RTWN_RTL8192CU_DEV + /* RTL8192EU */ +#define RTWN_RTL8192EU_DEV(v,p) \ + { USB_VPI(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, RTWN_CHIP_RTL8192EU) } + RTWN_RTL8192EU_DEV(DLINK, DWA131E1), + RTWN_RTL8192EU_DEV(REALTEK, RTL8192EU), + RTWN_RTL8192EU_DEV(TPLINK, WN822NV4), + RTWN_RTL8192EU_DEV(TPLINK, WN823NV2), +#undef RTWN_RTL8192EU_DEV + /* RTL8188EU */ #define RTWN_RTL8188EU_DEV(v,p) \ { USB_VPI(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, RTWN_CHIP_RTL8188EU) } @@ -148,6 +158,7 @@ typedef void (*chip_usb_attach)(struct rtwn_usb_softc *); static const chip_usb_attach rtwn_chip_usb_attach[RTWN_CHIP_MAX_USB] = { [RTWN_CHIP_RTL8192CU] = r92cu_attach, + [RTWN_CHIP_RTL8192EU] = r92eu_attach, [RTWN_CHIP_RTL8188EU] = r88eu_attach, [RTWN_CHIP_RTL8812AU] = r12au_attach, [RTWN_CHIP_RTL8821AU] = r21au_attach diff --git a/freebsd/sys/dev/rtwn/usb/rtwn_usb_ep.c b/freebsd/sys/dev/rtwn/usb/rtwn_usb_ep.c index a1fafb46..b75aa3b9 100644 --- a/freebsd/sys/dev/rtwn/usb/rtwn_usb_ep.c +++ b/freebsd/sys/dev/rtwn/usb/rtwn_usb_ep.c @@ -60,7 +60,7 @@ __FBSDID("$FreeBSD$"); #include -static struct usb_config rtwn_config[RTWN_N_TRANSFER] = { +static const struct usb_config rtwn_config_common[RTWN_N_TRANSFER] = { [RTWN_BULK_RX] = { .type = UE_BULK, .endpoint = UE_ADDR_ANY, @@ -163,6 +163,7 @@ rtwn_usb_setup_queues(struct rtwn_usb_softc *uc) int rtwn_usb_setup_endpoints(struct rtwn_usb_softc *uc) { + struct usb_config *rtwn_config; struct rtwn_softc *sc = &uc->uc_sc; const uint8_t iface_index = RTWN_IFACE_INDEX; struct usb_endpoint *ep, *ep_end; @@ -199,6 +200,9 @@ rtwn_usb_setup_endpoints(struct rtwn_usb_softc *uc) return (EINVAL); } + rtwn_config = malloc(sizeof(rtwn_config_common), M_TEMP, M_WAITOK); + memcpy(rtwn_config, rtwn_config_common, sizeof(rtwn_config_common)); + /* NB: keep in sync with rtwn_dma_init(). */ rtwn_config[RTWN_BULK_TX_VO].endpoint = addr[0]; switch (uc->ntx) { @@ -226,6 +230,8 @@ rtwn_usb_setup_endpoints(struct rtwn_usb_softc *uc) rtwn_config[RTWN_BULK_RX].bufsize = sc->rx_dma_size + 1024; error = usbd_transfer_setup(uc->uc_udev, &iface_index, uc->uc_xfer, rtwn_config, RTWN_N_TRANSFER, uc, &sc->sc_mtx); + free(rtwn_config, M_TEMP); + if (error) { device_printf(sc->sc_dev, "could not allocate USB transfers, " "err=%s\n", usbd_errstr(error)); diff --git a/freebsd/sys/dev/rtwn/usb/rtwn_usb_rx.c b/freebsd/sys/dev/rtwn/usb/rtwn_usb_rx.c index 8795e16d..4afa71af 100644 --- a/freebsd/sys/dev/rtwn/usb/rtwn_usb_rx.c +++ b/freebsd/sys/dev/rtwn/usb/rtwn_usb_rx.c @@ -238,7 +238,7 @@ rtwn_report_intr(struct rtwn_usb_softc *uc, struct usb_xfer *xfer, } static struct ieee80211_node * -rtwn_rx_frame(struct rtwn_softc *sc, struct mbuf *m, int8_t *rssi) +rtwn_rx_frame(struct rtwn_softc *sc, struct mbuf *m) { struct r92c_rx_stat stat; @@ -246,7 +246,7 @@ rtwn_rx_frame(struct rtwn_softc *sc, struct mbuf *m, int8_t *rssi) m_copydata(m, 0, sizeof(struct r92c_rx_stat), (caddr_t)&stat); m_adj(m, sizeof(struct r92c_rx_stat)); - return (rtwn_rx_common(sc, m, &stat, rssi)); + return (rtwn_rx_common(sc, m, &stat)); } void @@ -258,7 +258,6 @@ rtwn_bulk_rx_callback(struct usb_xfer *xfer, usb_error_t error) struct ieee80211_node *ni; struct mbuf *m = NULL, *next; struct rtwn_data *data; - int8_t nf, rssi; RTWN_ASSERT_LOCKED(sc); @@ -293,19 +292,15 @@ tr_setup: next = m->m_next; m->m_next = NULL; - ni = rtwn_rx_frame(sc, m, &rssi); + ni = rtwn_rx_frame(sc, m); RTWN_UNLOCK(sc); - nf = RTWN_NOISE_FLOOR; if (ni != NULL) { - if (ni->ni_flags & IEEE80211_NODE_HT) - m->m_flags |= M_AMPDU; - (void)ieee80211_input(ni, m, rssi - nf, nf); + (void)ieee80211_input_mimo(ni, m); ieee80211_free_node(ni); } else { - (void)ieee80211_input_all(ic, m, - rssi - nf, nf); + (void)ieee80211_input_mimo_all(ic, m); } RTWN_LOCK(sc); m = next; @@ -326,17 +321,6 @@ tr_setup: break; } finish: - /* Finished receive; age anything left on the FF queue by a little bump */ - /* - * XXX TODO: just make this a callout timer schedule so we can - * flush the FF staging queue if we're approaching idle. - */ -#ifdef IEEE80211_SUPPORT_SUPERG - if (!(sc->sc_flags & RTWN_FW_LOADED) || - sc->sc_ratectl != RTWN_RATECTL_NET80211) - rtwn_cmd_sleepable(sc, NULL, 0, rtwn_ff_flush_all); -#endif - /* Kick-start more transmit in case we stalled */ rtwn_start(sc); } diff --git a/freebsd/sys/dev/tsec/if_tsec.c b/freebsd/sys/dev/tsec/if_tsec.c index 5b94af9c..25802db0 100644 --- a/freebsd/sys/dev/tsec/if_tsec.c +++ b/freebsd/sys/dev/tsec/if_tsec.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -73,8 +74,8 @@ static int tsec_alloc_dma_desc(device_t dev, bus_dma_tag_t *dtag, bus_dmamap_t *dmap, bus_size_t dsize, void **vaddr, void *raddr, const char *dname); static void tsec_dma_ctl(struct tsec_softc *sc, int state); -static int tsec_encap(struct tsec_softc *sc, struct mbuf *m_head, - int fcb_inserted); +static void tsec_encap(struct ifnet *ifp, struct tsec_softc *sc, + struct mbuf *m0, uint16_t fcb_flags, int *start_tx); static void tsec_free_dma(struct tsec_softc *sc); static void tsec_free_dma_desc(bus_dma_tag_t dtag, bus_dmamap_t dmap, void *vaddr); static int tsec_ifmedia_upd(struct ifnet *ifp); @@ -123,8 +124,6 @@ tsec_attach(struct tsec_softc *sc) { uint8_t hwaddr[ETHER_ADDR_LEN]; struct ifnet *ifp; - bus_dmamap_t *map_ptr; - bus_dmamap_t **map_pptr; int error = 0; int i; @@ -181,7 +180,7 @@ tsec_attach(struct tsec_softc *sc) BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ MCLBYTES * (TSEC_TX_NUM_DESC - 1), /* maxsize */ - TSEC_TX_NUM_DESC - 1, /* nsegments */ + TSEC_TX_MAX_DMA_SEGS, /* nsegments */ MCLBYTES, 0, /* maxsegsz, flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sc->tsec_tx_mtag); /* dmat */ @@ -211,17 +210,15 @@ tsec_attach(struct tsec_softc *sc) } /* Create TX busdma maps */ - map_ptr = sc->tx_map_data; - map_pptr = sc->tx_map_unused_data; - for (i = 0; i < TSEC_TX_NUM_DESC; i++) { - map_pptr[i] = &map_ptr[i]; - error = bus_dmamap_create(sc->tsec_tx_mtag, 0, map_pptr[i]); + error = bus_dmamap_create(sc->tsec_tx_mtag, 0, + &sc->tx_bufmap[i].map); if (error) { device_printf(sc->dev, "failed to init TX ring\n"); tsec_detach(sc); return (ENXIO); } + sc->tx_bufmap[i].map_initialized = 1; } /* Create RX busdma maps and zero mbuf handlers */ @@ -370,13 +367,33 @@ tsec_init(void *xsc) TSEC_GLOBAL_UNLOCK(sc); } +static int +tsec_mii_wait(struct tsec_softc *sc, uint32_t flags) +{ + int timeout; + + /* + * The status indicators are not set immediatly after a command. + * Discard the first value. + */ + TSEC_PHY_READ(sc, TSEC_REG_MIIMIND); + + timeout = TSEC_READ_RETRY; + while ((TSEC_PHY_READ(sc, TSEC_REG_MIIMIND) & flags) && --timeout) + DELAY(TSEC_READ_DELAY); + + return (timeout == 0); +} + + static void tsec_init_locked(struct tsec_softc *sc) { struct tsec_desc *tx_desc = sc->tsec_tx_vaddr; struct tsec_desc *rx_desc = sc->tsec_rx_vaddr; struct ifnet *ifp = sc->tsec_ifp; - uint32_t timeout, val, i; + uint32_t val, i; + int timeout; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; @@ -435,15 +452,13 @@ tsec_init_locked(struct tsec_softc *sc) TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCFG, TSEC_MIIMCFG_CLKDIV28); /* Step 8: Read MII Mgmt indicator register and check for Busy = 0 */ - timeout = TSEC_READ_RETRY; - while (--timeout && (TSEC_PHY_READ(sc, TSEC_REG_MIIMIND) & - TSEC_MIIMIND_BUSY)) - DELAY(TSEC_READ_DELAY); - if (timeout == 0) { + timeout = tsec_mii_wait(sc, TSEC_MIIMIND_BUSY); + + TSEC_PHY_UNLOCK(sc); + if (timeout) { if_printf(ifp, "tsec_init_locked(): Mgmt busy timeout\n"); return; } - TSEC_PHY_UNLOCK(sc); /* Step 9: Setup the MII Mgmt */ #ifdef __rtems__ @@ -724,124 +739,135 @@ static void tsec_start_locked(struct ifnet *ifp) { struct tsec_softc *sc; - struct mbuf *m0, *mtmp; + struct mbuf *m0; struct tsec_tx_fcb *tx_fcb; - unsigned int queued = 0; - int csum_flags, fcb_inserted = 0; + int csum_flags; + int start_tx; + uint16_t fcb_flags; sc = ifp->if_softc; + start_tx = 0; TSEC_TRANSMIT_LOCK_ASSERT(sc); - if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != - IFF_DRV_RUNNING) - return; - if (sc->tsec_link == 0) return; bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + for (;;) { + + if (TSEC_FREE_TX_DESC(sc) < TSEC_TX_MAX_DMA_SEGS) { + /* No free descriptors */ + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + break; + } + /* Get packet from the queue */ IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) break; /* Insert TCP/IP Off-load frame control block */ + fcb_flags = 0; csum_flags = m0->m_pkthdr.csum_flags; if (csum_flags) { - M_PREPEND(m0, sizeof(struct tsec_tx_fcb), M_NOWAIT); if (m0 == NULL) break; - tx_fcb = mtod(m0, struct tsec_tx_fcb *); - tx_fcb->flags = 0; - tx_fcb->l3_offset = ETHER_HDR_LEN; - tx_fcb->l4_offset = sizeof(struct ip); - if (csum_flags & CSUM_IP) - tx_fcb->flags |= TSEC_TX_FCB_IP4 | + fcb_flags |= TSEC_TX_FCB_IP4 | TSEC_TX_FCB_CSUM_IP; if (csum_flags & CSUM_TCP) - tx_fcb->flags |= TSEC_TX_FCB_TCP | + fcb_flags |= TSEC_TX_FCB_TCP | TSEC_TX_FCB_CSUM_TCP_UDP; if (csum_flags & CSUM_UDP) - tx_fcb->flags |= TSEC_TX_FCB_UDP | + fcb_flags |= TSEC_TX_FCB_UDP | TSEC_TX_FCB_CSUM_TCP_UDP; - fcb_inserted = 1; + tx_fcb = mtod(m0, struct tsec_tx_fcb *); + tx_fcb->flags = fcb_flags; + tx_fcb->l3_offset = ETHER_HDR_LEN; + tx_fcb->l4_offset = sizeof(struct ip); } - mtmp = m_defrag(m0, M_NOWAIT); - if (mtmp) - m0 = mtmp; - - if (tsec_encap(sc, m0, fcb_inserted)) { - IFQ_DRV_PREPEND(&ifp->if_snd, m0); - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - break; - } - queued++; - BPF_MTAP(ifp, m0); + tsec_encap(ifp, sc, m0, fcb_flags, &start_tx); } bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - if (queued) { + if (start_tx) { /* Enable transmitter and watchdog timer */ TSEC_WRITE(sc, TSEC_REG_TSTAT, TSEC_TSTAT_THLT); sc->tsec_watchdog = 5; } } -static int -tsec_encap(struct tsec_softc *sc, struct mbuf *m0, int fcb_inserted) +static void +tsec_encap(struct ifnet *ifp, struct tsec_softc *sc, struct mbuf *m0, + uint16_t fcb_flags, int *start_tx) { - struct tsec_desc *tx_desc = NULL; - struct ifnet *ifp; - bus_dma_segment_t segs[TSEC_TX_NUM_DESC]; - bus_dmamap_t *mapp; - int csum_flag = 0, error, seg, nsegs; + bus_dma_segment_t segs[TSEC_TX_MAX_DMA_SEGS]; + int error, i, nsegs; + struct tsec_bufmap *tx_bufmap; + uint32_t tx_idx; + uint16_t flags; TSEC_TRANSMIT_LOCK_ASSERT(sc); - ifp = sc->tsec_ifp; - - if (TSEC_FREE_TX_DESC(sc) == 0) { - /* No free descriptors */ - return (-1); - } - - /* Fetch unused map */ - mapp = TSEC_ALLOC_TX_MAP(sc); - + tx_idx = sc->tx_idx_head; + tx_bufmap = &sc->tx_bufmap[tx_idx]; + /* Create mapping in DMA memory */ - error = bus_dmamap_load_mbuf_sg(sc->tsec_tx_mtag, - *mapp, m0, segs, &nsegs, BUS_DMA_NOWAIT); - if (error != 0 || nsegs > TSEC_FREE_TX_DESC(sc) || nsegs <= 0) { - bus_dmamap_unload(sc->tsec_tx_mtag, *mapp); - TSEC_FREE_TX_MAP(sc, mapp); - return ((error != 0) ? error : -1); + error = bus_dmamap_load_mbuf_sg(sc->tsec_tx_mtag, tx_bufmap->map, m0, + segs, &nsegs, BUS_DMA_NOWAIT); + if (error == EFBIG) { + /* Too many segments! Defrag and try again. */ + struct mbuf *m = m_defrag(m0, M_NOWAIT); + + if (m == NULL) { + m_freem(m0); + return; + } + m0 = m; + error = bus_dmamap_load_mbuf_sg(sc->tsec_tx_mtag, + tx_bufmap->map, m0, segs, &nsegs, BUS_DMA_NOWAIT); + } + if (error != 0) { + /* Give up. */ + m_freem(m0); + return; } - bus_dmamap_sync(sc->tsec_tx_mtag, *mapp, BUS_DMASYNC_PREWRITE); - - if ((ifp->if_flags & IFF_DEBUG) && (nsegs > 1)) - if_printf(ifp, "TX buffer has %d segments\n", nsegs); - - if (fcb_inserted) - csum_flag = TSEC_TXBD_TOE; - - /* Everything is ok, now we can send buffers */ - for (seg = 0; seg < nsegs; seg++) { - tx_desc = TSEC_GET_CUR_TX_DESC(sc); - tx_desc->length = segs[seg].ds_len; - tx_desc->bufptr = segs[seg].ds_addr; + bus_dmamap_sync(sc->tsec_tx_mtag, tx_bufmap->map, + BUS_DMASYNC_PREWRITE); + tx_bufmap->mbuf = m0; + + /* + * Fill in the TX descriptors back to front so that READY bit in first + * descriptor is set last. + */ + tx_idx = (tx_idx + (uint32_t)nsegs) & (TSEC_TX_NUM_DESC - 1); + sc->tx_idx_head = tx_idx; + flags = TSEC_TXBD_L | TSEC_TXBD_I | TSEC_TXBD_R | TSEC_TXBD_TC; + for (i = nsegs - 1; i >= 0; i--) { + struct tsec_desc *tx_desc; + + tx_idx = (tx_idx - 1) & (TSEC_TX_NUM_DESC - 1); + tx_desc = &sc->tsec_tx_vaddr[tx_idx]; + tx_desc->length = segs[i].ds_len; + tx_desc->bufptr = segs[i].ds_addr; + + if (i == 0) { + wmb(); + + if (fcb_flags != 0) + flags |= TSEC_TXBD_TOE; + } /* * Set flags: @@ -851,17 +877,14 @@ tsec_encap(struct tsec_softc *sc, struct mbuf *m0, int fcb_inserted) * - transmit the CRC sequence after the last data byte * - interrupt after the last buffer */ - tx_desc->flags = - (tx_desc->flags & TSEC_TXBD_W) | - ((seg == 0) ? csum_flag : 0) | TSEC_TXBD_R | TSEC_TXBD_TC | - ((seg == nsegs - 1) ? TSEC_TXBD_L | TSEC_TXBD_I : 0); - } + tx_desc->flags = (tx_idx == (TSEC_TX_NUM_DESC - 1) ? + TSEC_TXBD_W : 0) | flags; - /* Save mbuf and DMA mapping for release at later stage */ - TSEC_PUT_TX_MBUF(sc, m0); - TSEC_PUT_TX_MAP(sc, mapp); + flags &= ~(TSEC_TXBD_L | TSEC_TXBD_I); + } - return (0); + BPF_MTAP(ifp, m0); + *start_tx = 1; } static void @@ -925,11 +948,8 @@ tsec_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct tsec_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; - device_t dev; int mask, error = 0; - dev = sc->dev; - switch (command) { case SIOCSIFMTU: TSEC_GLOBAL_LOCK(sc); @@ -1188,9 +1208,9 @@ tsec_free_dma(struct tsec_softc *sc) /* Free TX maps */ for (i = 0; i < TSEC_TX_NUM_DESC; i++) - if (sc->tx_map_data[i] != NULL) + if (sc->tx_bufmap[i].map_initialized) bus_dmamap_destroy(sc->tsec_tx_mtag, - sc->tx_map_data[i]); + sc->tx_bufmap[i].map); /* Destroy tag for TX mbufs */ bus_dma_tag_destroy(sc->tsec_tx_mtag); @@ -1225,8 +1245,6 @@ static void tsec_stop(struct tsec_softc *sc) { struct ifnet *ifp; - struct mbuf *m0; - bus_dmamap_t *mapp; uint32_t tmpval; TSEC_GLOBAL_LOCK_ASSERT(sc); @@ -1243,16 +1261,15 @@ tsec_stop(struct tsec_softc *sc) tsec_dma_ctl(sc, 0); /* Remove pending data from TX queue */ - while (!TSEC_EMPTYQ_TX_MBUF(sc)) { - m0 = TSEC_GET_TX_MBUF(sc); - mapp = TSEC_GET_TX_MAP(sc); - - bus_dmamap_sync(sc->tsec_tx_mtag, *mapp, + while (sc->tx_idx_tail != sc->tx_idx_head) { + bus_dmamap_sync(sc->tsec_tx_mtag, + sc->tx_bufmap[sc->tx_idx_tail].map, BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(sc->tsec_tx_mtag, *mapp); - - TSEC_FREE_TX_MAP(sc, mapp); - m_freem(m0); + bus_dmamap_unload(sc->tsec_tx_mtag, + sc->tx_bufmap[sc->tx_idx_tail].map); + m_freem(sc->tx_bufmap[sc->tx_idx_tail].mbuf); + sc->tx_idx_tail = (sc->tx_idx_tail + 1) + & (TSEC_TX_NUM_DESC - 1); } /* Disable RX and TX */ @@ -1304,7 +1321,6 @@ tsec_receive_intr_locked(struct tsec_softc *sc, int count) struct ifnet *ifp; struct rx_data_type *rx_data; struct mbuf *m; - device_t dev; uint32_t i; int c, rx_npkts; uint16_t flags; @@ -1313,7 +1329,6 @@ tsec_receive_intr_locked(struct tsec_softc *sc, int count) ifp = sc->tsec_ifp; rx_data = sc->rx_data; - dev = sc->dev; rx_npkts = 0; bus_dmamap_sync(sc->tsec_rx_dtag, sc->tsec_rx_dmap, @@ -1381,7 +1396,7 @@ tsec_receive_intr_locked(struct tsec_softc *sc, int count) if (tsec_new_rxbuf(sc->tsec_rx_mtag, rx_data[i].map, &rx_data[i].mbuf, &rx_data[i].paddr)) { - if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); + if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); /* * We ran out of mbufs; didn't consume current * descriptor and have to return it to the queue. @@ -1451,11 +1466,8 @@ tsec_receive_intr(void *arg) static void tsec_transmit_intr_locked(struct tsec_softc *sc) { - struct tsec_desc *tx_desc; struct ifnet *ifp; - struct mbuf *m0; - bus_dmamap_t *mapp; - int send = 0; + uint32_t tx_idx; TSEC_TRANSMIT_LOCK_ASSERT(sc); @@ -1474,44 +1486,41 @@ tsec_transmit_intr_locked(struct tsec_softc *sc) bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - while (TSEC_CUR_DIFF_DIRTY_TX_DESC(sc)) { - tx_desc = TSEC_GET_DIRTY_TX_DESC(sc); + tx_idx = sc->tx_idx_tail; + while (tx_idx != sc->tx_idx_head) { + struct tsec_desc *tx_desc; + struct tsec_bufmap *tx_bufmap; + + tx_desc = &sc->tsec_tx_vaddr[tx_idx]; if (tx_desc->flags & TSEC_TXBD_R) { - TSEC_BACK_DIRTY_TX_DESC(sc); break; } - if ((tx_desc->flags & TSEC_TXBD_L) == 0) + tx_bufmap = &sc->tx_bufmap[tx_idx]; + tx_idx = (tx_idx + 1) & (TSEC_TX_NUM_DESC - 1); + if (tx_bufmap->mbuf == NULL) continue; /* * This is the last buf in this packet, so unmap and free it. */ - m0 = TSEC_GET_TX_MBUF(sc); - mapp = TSEC_GET_TX_MAP(sc); - - bus_dmamap_sync(sc->tsec_tx_mtag, *mapp, + bus_dmamap_sync(sc->tsec_tx_mtag, tx_bufmap->map, BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(sc->tsec_tx_mtag, *mapp); - - TSEC_FREE_TX_MAP(sc, mapp); - m_freem(m0); + bus_dmamap_unload(sc->tsec_tx_mtag, tx_bufmap->map); + m_freem(tx_bufmap->mbuf); + tx_bufmap->mbuf = NULL; if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); - send = 1; } + sc->tx_idx_tail = tx_idx; bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - if (send) { - /* Now send anything that was pending */ - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - tsec_start_locked(ifp); + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + tsec_start_locked(ifp); - /* Stop wathdog if all sent */ - if (TSEC_EMPTYQ_TX_MBUF(sc)) - sc->tsec_watchdog = 0; - } + if (sc->tx_idx_tail == sc->tx_idx_head) + sc->tsec_watchdog = 0; } void @@ -1562,13 +1571,9 @@ tsec_error_intr_locked(struct tsec_softc *sc, int count) TSEC_WRITE(sc, TSEC_REG_TSTAT, TSEC_TSTAT_THLT); } - /* Check receiver errors */ + /* Check for discarded frame due to a lack of buffers */ if (eflags & TSEC_IEVENT_BSY) { - if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); - - /* Get data from RX buffers */ - tsec_receive_intr_locked(sc, count); } if (ifp->if_flags & IFF_DEBUG) @@ -1602,7 +1607,7 @@ int tsec_miibus_readreg(device_t dev, int phy, int reg) { struct tsec_softc *sc; - uint32_t timeout; + int timeout; int rv; sc = device_get_softc(dev); @@ -1612,17 +1617,13 @@ tsec_miibus_readreg(device_t dev, int phy, int reg) TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCOM, 0); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCOM, TSEC_MIIMCOM_READCYCLE); - timeout = TSEC_READ_RETRY; - while (--timeout && TSEC_PHY_READ(sc, TSEC_REG_MIIMIND) & - (TSEC_MIIMIND_NOTVALID | TSEC_MIIMIND_BUSY)) - DELAY(TSEC_READ_DELAY); - - if (timeout == 0) - device_printf(dev, "Timeout while reading from PHY!\n"); - + timeout = tsec_mii_wait(sc, TSEC_MIIMIND_NOTVALID | TSEC_MIIMIND_BUSY); rv = TSEC_PHY_READ(sc, TSEC_REG_MIIMSTAT); TSEC_PHY_UNLOCK(); + if (timeout) + device_printf(dev, "Timeout while reading from PHY!\n"); + return (rv); } @@ -1630,21 +1631,17 @@ int tsec_miibus_writereg(device_t dev, int phy, int reg, int value) { struct tsec_softc *sc; - uint32_t timeout; + int timeout; sc = device_get_softc(dev); TSEC_PHY_LOCK(); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMADD, (phy << 8) | reg); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCON, value); - - timeout = TSEC_READ_RETRY; - while (--timeout && (TSEC_READ(sc, TSEC_REG_MIIMIND) & - TSEC_MIIMIND_BUSY)) - DELAY(TSEC_READ_DELAY); + timeout = tsec_mii_wait(sc, TSEC_MIIMIND_BUSY); TSEC_PHY_UNLOCK(); - if (timeout == 0) + if (timeout) device_printf(dev, "Timeout while writing to PHY!\n"); return (0); diff --git a/freebsd/sys/dev/tsec/if_tsec.h b/freebsd/sys/dev/tsec/if_tsec.h index d13f4639..c8dca3bf 100644 --- a/freebsd/sys/dev/tsec/if_tsec.h +++ b/freebsd/sys/dev/tsec/if_tsec.h @@ -32,6 +32,7 @@ #define TSEC_RX_NUM_DESC 256 #define TSEC_TX_NUM_DESC 256 +#define TSEC_TX_MAX_DMA_SEGS 8 /* Interrupt Coalescing types */ #define TSEC_IC_RX 0 @@ -44,6 +45,12 @@ #define TSEC_MIN_FRAME_SIZE 64 #define TSEC_MAX_FRAME_SIZE 9600 +struct tsec_bufmap { + bus_dmamap_t map; + int map_initialized; + struct mbuf *mbuf; +}; + struct tsec_softc { /* XXX MII bus requires that struct ifnet is first!!! */ struct ifnet *tsec_ifp; @@ -59,16 +66,16 @@ struct tsec_softc { bus_dma_tag_t tsec_tx_dtag; /* TX descriptors tag */ bus_dmamap_t tsec_tx_dmap; /* TX descriptors map */ - struct tsec_desc *tsec_tx_vaddr;/* vadress of TX descriptors */ - uint32_t tsec_tx_raddr; /* real address of TX descriptors */ + bus_dma_tag_t tsec_tx_mtag; /* TX mbufs tag */ + uint32_t tx_idx_head; /* TX head descriptor/bufmap index */ + uint32_t tx_idx_tail; /* TX tail descriptor/bufmap index */ + struct tsec_desc *tsec_tx_vaddr;/* virtual address of TX descriptors */ + struct tsec_bufmap tx_bufmap[TSEC_TX_NUM_DESC]; + bus_dma_tag_t tsec_rx_mtag; /* TX mbufs tag */ bus_dma_tag_t tsec_rx_dtag; /* RX descriptors tag */ bus_dmamap_t tsec_rx_dmap; /* RX descriptors map */ struct tsec_desc *tsec_rx_vaddr; /* vadress of RX descriptors */ - uint32_t tsec_rx_raddr; /* real address of RX descriptors */ - - bus_dma_tag_t tsec_tx_mtag; /* TX mbufs tag */ - bus_dma_tag_t tsec_rx_mtag; /* TX mbufs tag */ struct rx_data_type { bus_dmamap_t map; /* mbuf map */ @@ -76,8 +83,6 @@ struct tsec_softc { uint32_t paddr; /* DMA address of buffer */ } rx_data[TSEC_RX_NUM_DESC]; - uint32_t tx_cur_desc_cnt; - uint32_t tx_dirty_desc_cnt; uint32_t rx_cur_desc_cnt; struct resource *sc_rres; /* register resource */ @@ -104,24 +109,6 @@ struct tsec_softc { struct callout tsec_callout; int tsec_watchdog; - /* TX maps */ - bus_dmamap_t tx_map_data[TSEC_TX_NUM_DESC]; - - /* unused TX maps data */ - uint32_t tx_map_unused_get_cnt; - uint32_t tx_map_unused_put_cnt; - bus_dmamap_t *tx_map_unused_data[TSEC_TX_NUM_DESC]; - - /* used TX maps data */ - uint32_t tx_map_used_get_cnt; - uint32_t tx_map_used_put_cnt; - bus_dmamap_t *tx_map_used_data[TSEC_TX_NUM_DESC]; - - /* mbufs in TX queue */ - uint32_t tx_mbuf_used_get_cnt; - uint32_t tx_mbuf_used_put_cnt; - struct mbuf *tx_mbuf_used_data[TSEC_TX_NUM_DESC]; - /* interrupt coalescing */ struct mtx ic_lock; uint32_t rx_ic_time; /* RW, valid values 0..65535 */ @@ -136,6 +123,9 @@ struct tsec_softc { bus_space_tag_t phy_bst; bus_space_handle_t phy_bsh; int phy_regoff; + + uint32_t tsec_rx_raddr; /* real address of RX descriptors */ + uint32_t tsec_tx_raddr; /* real address of TX descriptors */ }; /* interface to get/put generic objects */ @@ -156,75 +146,8 @@ struct tsec_softc { (sc)->count = (wrap) - 1; \ } while (0) -/* TX maps interface */ -#define TSEC_TX_MAP_CNT_INIT(sc) do { \ - TSEC_CNT_INIT((sc)->tx_map_unused_get_cnt, TSEC_TX_NUM_DESC); \ - TSEC_CNT_INIT((sc)->tx_map_unused_put_cnt, TSEC_TX_NUM_DESC); \ - TSEC_CNT_INIT((sc)->tx_map_used_get_cnt, TSEC_TX_NUM_DESC); \ - TSEC_CNT_INIT((sc)->tx_map_used_put_cnt, TSEC_TX_NUM_DESC); \ -} while (0) - -/* interface to get/put unused TX maps */ -#define TSEC_ALLOC_TX_MAP(sc) \ - TSEC_GET_GENERIC(sc, tx_map_unused_data, tx_map_unused_get_cnt, \ - TSEC_TX_NUM_DESC) - -#define TSEC_FREE_TX_MAP(sc, val) \ - TSEC_PUT_GENERIC(sc, tx_map_unused_data, tx_map_unused_put_cnt, \ - TSEC_TX_NUM_DESC, val) - -/* interface to get/put used TX maps */ -#define TSEC_GET_TX_MAP(sc) \ - TSEC_GET_GENERIC(sc, tx_map_used_data, tx_map_used_get_cnt, \ - TSEC_TX_NUM_DESC) - -#define TSEC_PUT_TX_MAP(sc, val) \ - TSEC_PUT_GENERIC(sc, tx_map_used_data, tx_map_used_put_cnt, \ - TSEC_TX_NUM_DESC, val) - -/* interface to get/put TX mbufs in send queue */ -#define TSEC_TX_MBUF_CNT_INIT(sc) do { \ - TSEC_CNT_INIT((sc)->tx_mbuf_used_get_cnt, TSEC_TX_NUM_DESC); \ - TSEC_CNT_INIT((sc)->tx_mbuf_used_put_cnt, TSEC_TX_NUM_DESC); \ -} while (0) - -#define TSEC_GET_TX_MBUF(sc) \ - TSEC_GET_GENERIC(sc, tx_mbuf_used_data, tx_mbuf_used_get_cnt, \ - TSEC_TX_NUM_DESC) - -#define TSEC_PUT_TX_MBUF(sc, val) \ - TSEC_PUT_GENERIC(sc, tx_mbuf_used_data, tx_mbuf_used_put_cnt, \ - TSEC_TX_NUM_DESC, val) - -#define TSEC_EMPTYQ_TX_MBUF(sc) \ - ((sc)->tx_mbuf_used_get_cnt == (sc)->tx_mbuf_used_put_cnt) - -/* interface for manage tx tsec_desc */ -#define TSEC_TX_DESC_CNT_INIT(sc) do { \ - TSEC_CNT_INIT((sc)->tx_cur_desc_cnt, TSEC_TX_NUM_DESC); \ - TSEC_CNT_INIT((sc)->tx_dirty_desc_cnt, TSEC_TX_NUM_DESC); \ -} while (0) - -#define TSEC_GET_CUR_TX_DESC(sc) \ - &TSEC_GET_GENERIC(sc, tsec_tx_vaddr, tx_cur_desc_cnt, \ - TSEC_TX_NUM_DESC) - -#define TSEC_GET_DIRTY_TX_DESC(sc) \ - &TSEC_GET_GENERIC(sc, tsec_tx_vaddr, tx_dirty_desc_cnt, \ - TSEC_TX_NUM_DESC) - -#define TSEC_BACK_DIRTY_TX_DESC(sc) \ - TSEC_BACK_GENERIC(sc, tx_dirty_desc_cnt, TSEC_TX_NUM_DESC) - -#define TSEC_CUR_DIFF_DIRTY_TX_DESC(sc) \ - ((sc)->tx_cur_desc_cnt != (sc)->tx_dirty_desc_cnt) - -#define TSEC_FREE_TX_DESC(sc) \ - (((sc)->tx_cur_desc_cnt < (sc)->tx_dirty_desc_cnt) ? \ - ((sc)->tx_dirty_desc_cnt - (sc)->tx_cur_desc_cnt - 1) \ - : \ - (TSEC_TX_NUM_DESC - (sc)->tx_cur_desc_cnt \ - + (sc)->tx_dirty_desc_cnt - 1)) +#define TSEC_FREE_TX_DESC(sc) \ + (((sc)->tx_idx_tail - (sc)->tx_idx_head - 1) & (TSEC_TX_NUM_DESC - 1)) /* interface for manage rx tsec_desc */ #define TSEC_RX_DESC_CNT_INIT(sc) do { \ @@ -243,9 +166,8 @@ struct tsec_softc { /* init all counters (for init only!) */ #define TSEC_TX_RX_COUNTERS_INIT(sc) do { \ - TSEC_TX_MAP_CNT_INIT(sc); \ - TSEC_TX_MBUF_CNT_INIT(sc); \ - TSEC_TX_DESC_CNT_INIT(sc); \ + sc->tx_idx_head = 0; \ + sc->tx_idx_tail = 0; \ TSEC_RX_DESC_CNT_INIT(sc); \ } while (0) diff --git a/freebsd/sys/dev/usb/quirk/usb_quirk.c b/freebsd/sys/dev/usb/quirk/usb_quirk.c index d73116e2..794bb0b0 100644 --- a/freebsd/sys/dev/usb/quirk/usb_quirk.c +++ b/freebsd/sys/dev/usb/quirk/usb_quirk.c @@ -521,6 +521,7 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = { /* Non-standard USB AUDIO devices */ USB_QUIRK(MAUDIO, FASTTRACKULTRA, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(MAUDIO, FASTTRACKULTRA8R, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), + USB_QUIRK(CMEDIA, CM6206, 0x0000, 0xffff, UQ_AU_SET_SPDIF_CM6206), /* * Quirks for manufacturers which USB devices does not respond @@ -607,6 +608,7 @@ static const char *usb_quirk_str[USB_QUIRK_MAX] = { [UQ_AU_VENDOR_CLASS] = "UQ_AU_VENDOR_CLASS", [UQ_SINGLE_CMD_MIDI] = "UQ_SINGLE_CMD_MIDI", [UQ_MSC_DYMO_EJECT] = "UQ_MSC_DYMO_EJECT", + [UQ_AU_SET_SPDIF_CM6206] = "UQ_AU_SET_SPDIF_CM6206", }; /*------------------------------------------------------------------------* diff --git a/freebsd/sys/dev/usb/quirk/usb_quirk.h b/freebsd/sys/dev/usb/quirk/usb_quirk.h index 7010916c..f7e490ce 100644 --- a/freebsd/sys/dev/usb/quirk/usb_quirk.h +++ b/freebsd/sys/dev/usb/quirk/usb_quirk.h @@ -109,6 +109,7 @@ enum { UQ_AU_VENDOR_CLASS, /* audio device uses vendor and not audio class */ UQ_SINGLE_CMD_MIDI, /* at most one command per USB packet */ UQ_MSC_DYMO_EJECT, /* ejects Dymo MSC device */ + UQ_AU_SET_SPDIF_CM6206, /* enable S/PDIF audio output */ USB_QUIRK_MAX }; diff --git a/freebsd/sys/dev/usb/usb_hub.c b/freebsd/sys/dev/usb/usb_hub.c index b7d5b597..b3ee8ab7 100644 --- a/freebsd/sys/dev/usb/usb_hub.c +++ b/freebsd/sys/dev/usb/usb_hub.c @@ -2300,7 +2300,7 @@ usb_needs_explore(struct usb_bus *bus, uint8_t do_probe) * usb_needs_explore_all * * This function is called whenever a new driver is loaded and will - * cause that all USB busses are re-explored. + * cause that all USB buses are re-explored. *------------------------------------------------------------------------*/ void usb_needs_explore_all(void) @@ -2318,7 +2318,7 @@ usb_needs_explore_all(void) return; } /* - * Explore all USB busses in parallel. + * Explore all USB buses in parallel. */ max = devclass_get_maxunit(dc); while (max >= 0) { diff --git a/freebsd/sys/dev/usb/usb_pf.h b/freebsd/sys/dev/usb/usb_pf.h index 9d51e98c..29fe6ebd 100644 --- a/freebsd/sys/dev/usb/usb_pf.h +++ b/freebsd/sys/dev/usb/usb_pf.h @@ -15,7 +15,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/dev/usb/wlan/if_rsu.c b/freebsd/sys/dev/usb/wlan/if_rsu.c index 201e75f3..21d67465 100644 --- a/freebsd/sys/dev/usb/wlan/if_rsu.c +++ b/freebsd/sys/dev/usb/wlan/if_rsu.c @@ -528,6 +528,12 @@ rsu_attach(device_t self) sc->sc_ntxstream = 2; rft = "2T2R"; break; + case 0x3: /* "green" NIC */ + sc->sc_rftype = RTL8712_RFCONFIG_1T2R; + sc->sc_nrxstream = 2; + sc->sc_ntxstream = 1; + rft = "1T2R ('green')"; + break; default: device_printf(sc->sc_dev, "%s: unknown board type (rfconfig=0x%02x)\n", diff --git a/freebsd/sys/dev/usb/wlan/if_rum.c b/freebsd/sys/dev/usb/wlan/if_rum.c index 19155ec2..897f9c00 100644 --- a/freebsd/sys/dev/usb/wlan/if_rum.c +++ b/freebsd/sys/dev/usb/wlan/if_rum.c @@ -1505,11 +1505,10 @@ rum_tx_crypto_flags(struct rum_softc *sc, struct ieee80211_node *ni, static int rum_tx_mgt(struct rum_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) { - struct ieee80211vap *vap = ni->ni_vap; + const struct ieee80211_txparam *tp = ni->ni_txparms; struct ieee80211com *ic = &sc->sc_ic; struct rum_tx_data *data; struct ieee80211_frame *wh; - const struct ieee80211_txparam *tp; struct ieee80211_key *k = NULL; uint32_t flags = 0; uint16_t dur; @@ -1539,8 +1538,6 @@ rum_tx_mgt(struct rum_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) wh = mtod(m0, struct ieee80211_frame *); } - tp = &vap->iv_txparms[ieee80211_chan2mode(ic->ic_curchan)]; - if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) { flags |= RT2573_TX_NEED_ACK; @@ -1644,7 +1641,7 @@ rum_tx_data(struct rum_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) struct ieee80211com *ic = &sc->sc_ic; struct rum_tx_data *data; struct ieee80211_frame *wh; - const struct ieee80211_txparam *tp; + const struct ieee80211_txparam *tp = ni->ni_txparms; struct ieee80211_key *k = NULL; uint32_t flags = 0; uint16_t dur; @@ -1663,13 +1660,12 @@ rum_tx_data(struct rum_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) qos = 0; ac = M_WME_GETAC(m0); - tp = &vap->iv_txparms[ieee80211_chan2mode(ni->ni_chan)]; - if (IEEE80211_IS_MULTICAST(wh->i_addr1)) + if (m0->m_flags & M_EAPOL) + rate = tp->mgmtrate; + else if (IEEE80211_IS_MULTICAST(wh->i_addr1)) rate = tp->mcastrate; else if (tp->ucastrate != IEEE80211_FIXED_RATE_NONE) rate = tp->ucastrate; - else if (m0->m_flags & M_EAPOL) - rate = tp->mgmtrate; else { (void) ieee80211_ratectl_rate(ni, NULL, 0); rate = ni->ni_txrate; @@ -2192,12 +2188,11 @@ rum_set_chan(struct rum_softc *sc, struct ieee80211_channel *c) static void rum_set_maxretry(struct rum_softc *sc, struct ieee80211vap *vap) { - const struct ieee80211_txparam *tp; struct ieee80211_node *ni = vap->iv_bss; + const struct ieee80211_txparam *tp = ni->ni_txparms; struct rum_vap *rvp = RUM_VAP(vap); - tp = &vap->iv_txparms[ieee80211_chan2mode(ni->ni_chan)]; - rvp->maxretry = tp->maxretry < 0xf ? tp->maxretry : 0xf; + rvp->maxretry = MIN(tp->maxretry, 0xf); rum_modbits(sc, RT2573_TXRX_CSR4, RT2573_SHORT_RETRY(rvp->maxretry) | RT2573_LONG_RETRY(rvp->maxretry), diff --git a/freebsd/sys/dev/usb/wlan/if_run.c b/freebsd/sys/dev/usb/wlan/if_run.c index 9983fce2..5af8e859 100644 --- a/freebsd/sys/dev/usb/wlan/if_run.c +++ b/freebsd/sys/dev/usb/wlan/if_run.c @@ -3312,8 +3312,7 @@ run_tx(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni) struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_frame *wh; - struct ieee80211_channel *chan; - const struct ieee80211_txparam *tp; + const struct ieee80211_txparam *tp = ni->ni_txparms; struct run_node *rn = RUN_NODE(ni); struct run_tx_data *data; struct rt2870_txd *txd; @@ -3362,9 +3361,6 @@ run_tx(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni) RUN_DPRINTF(sc, RUN_DEBUG_XMIT, "qos %d\tqid %d\ttid %d\tqflags %x\n", qos, qid, tid, qflags); - chan = (ni->ni_chan != IEEE80211_CHAN_ANYC)?ni->ni_chan:ic->ic_curchan; - tp = &vap->iv_txparms[ieee80211_chan2mode(chan)]; - /* pickup a rate index */ if (IEEE80211_IS_MULTICAST(wh->i_addr1) || type != IEEE80211_FC0_TYPE_DATA || m->m_flags & M_EAPOL) { diff --git a/freebsd/sys/dev/usb/wlan/if_ural.c b/freebsd/sys/dev/usb/wlan/if_ural.c index d6a199ff..0e380ec0 100644 --- a/freebsd/sys/dev/usb/wlan/if_ural.c +++ b/freebsd/sys/dev/usb/wlan/if_ural.c @@ -1072,9 +1072,8 @@ ural_tx_bcn(struct ural_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) static int ural_tx_mgt(struct ural_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) { - struct ieee80211vap *vap = ni->ni_vap; + const struct ieee80211_txparam *tp = ni->ni_txparms; struct ieee80211com *ic = ni->ni_ic; - const struct ieee80211_txparam *tp; struct ural_tx_data *data; struct ieee80211_frame *wh; struct ieee80211_key *k; @@ -1087,8 +1086,6 @@ ural_tx_mgt(struct ural_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) STAILQ_REMOVE_HEAD(&sc->tx_free, next); sc->tx_nfree--; - tp = &vap->iv_txparms[ieee80211_chan2mode(ic->ic_curchan)]; - wh = mtod(m0, struct ieee80211_frame *); if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { k = ieee80211_crypto_encap(ni, m0); @@ -1241,7 +1238,7 @@ ural_tx_data(struct ural_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) struct ieee80211com *ic = ni->ni_ic; struct ural_tx_data *data; struct ieee80211_frame *wh; - const struct ieee80211_txparam *tp; + const struct ieee80211_txparam *tp = ni->ni_txparms; struct ieee80211_key *k; uint32_t flags = 0; uint16_t dur; @@ -1251,8 +1248,9 @@ ural_tx_data(struct ural_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) wh = mtod(m0, struct ieee80211_frame *); - tp = &vap->iv_txparms[ieee80211_chan2mode(ni->ni_chan)]; - if (IEEE80211_IS_MULTICAST(wh->i_addr1)) + if (m0->m_flags & M_EAPOL) + rate = tp->mgmtrate; + else if (IEEE80211_IS_MULTICAST(wh->i_addr1)) rate = tp->mcastrate; else if (tp->ucastrate != IEEE80211_FIXED_RATE_NONE) rate = tp->ucastrate; diff --git a/freebsd/sys/dev/usb/wlan/if_urtw.c b/freebsd/sys/dev/usb/wlan/if_urtw.c index 501cc50c..74fcee28 100644 --- a/freebsd/sys/dev/usb/wlan/if_urtw.c +++ b/freebsd/sys/dev/usb/wlan/if_urtw.c @@ -1645,7 +1645,7 @@ urtw_tx_start(struct urtw_softc *sc, struct ieee80211_node *ni, struct mbuf *m0, { struct ieee80211_frame *wh = mtod(m0, struct ieee80211_frame *); struct ieee80211_key *k; - const struct ieee80211_txparam *tp; + const struct ieee80211_txparam *tp = ni->ni_txparms; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = ni->ni_vap; struct usb_xfer *rtl8187b_pipes[URTW_8187B_TXPIPE_MAX] = { @@ -1692,11 +1692,10 @@ urtw_tx_start(struct urtw_softc *sc, struct ieee80211_node *ni, struct mbuf *m0, } if ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_MGT || - (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_CTL) { - tp = &vap->iv_txparms[ieee80211_chan2mode(ic->ic_curchan)]; + (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_CTL || + (m0->m_flags & M_EAPOL) != 0) { rate = tp->mgmtrate; } else { - tp = &vap->iv_txparms[ieee80211_chan2mode(ni->ni_chan)]; /* for data frames */ if (IEEE80211_IS_MULTICAST(wh->i_addr1)) rate = tp->mcastrate; diff --git a/freebsd/sys/dev/usb/wlan/if_zyd.c b/freebsd/sys/dev/usb/wlan/if_zyd.c index f935bfc9..1208d185 100644 --- a/freebsd/sys/dev/usb/wlan/if_zyd.c +++ b/freebsd/sys/dev/usb/wlan/if_zyd.c @@ -2441,7 +2441,7 @@ zyd_tx_start(struct zyd_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) struct zyd_tx_desc *desc; struct zyd_tx_data *data; struct ieee80211_frame *wh; - const struct ieee80211_txparam *tp; + const struct ieee80211_txparam *tp = ni->ni_txparms; struct ieee80211_key *k; int rate, totlen; static const uint8_t ratediv[] = ZYD_TX_RATEDIV; @@ -2455,11 +2455,10 @@ zyd_tx_start(struct zyd_softc *sc, struct mbuf *m0, struct ieee80211_node *ni) sc->tx_nfree--; if ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_MGT || - (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_CTL) { - tp = &vap->iv_txparms[ieee80211_chan2mode(ic->ic_curchan)]; + (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_CTL || + (m0->m_flags & M_EAPOL) != 0) { rate = tp->mgmtrate; } else { - tp = &vap->iv_txparms[ieee80211_chan2mode(ni->ni_chan)]; /* for data frames */ if (IEEE80211_IS_MULTICAST(wh->i_addr1)) rate = tp->mcastrate; @@ -2584,10 +2583,10 @@ zyd_start(struct zyd_softc *sc) while (sc->tx_nfree > 0 && (m = mbufq_dequeue(&sc->sc_snd)) != NULL) { ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; if (zyd_tx_start(sc, m, ni) != 0) { - ieee80211_free_node(ni); m_freem(m); if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); + ieee80211_free_node(ni); break; } } diff --git a/freebsd/sys/i386/i386/in_cksum.c b/freebsd/sys/i386/i386/in_cksum.c index beb5ae88..d16f6c21 100644 --- a/freebsd/sys/i386/i386/in_cksum.c +++ b/freebsd/sys/i386/i386/in_cksum.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/i386/include/machine/cpufunc.h b/freebsd/sys/i386/include/machine/cpufunc.h index df283e73..923d28ea 100644 --- a/freebsd/sys/i386/include/machine/cpufunc.h +++ b/freebsd/sys/i386/include/machine/cpufunc.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -160,6 +160,13 @@ mfence(void) __asm __volatile("mfence" : : : "memory"); } +static __inline void +sfence(void) +{ + + __asm __volatile("sfence" : : : "memory"); +} + #ifdef _KERNEL #ifndef __rtems__ @@ -630,34 +637,6 @@ load_dr3(u_int dr3) __asm __volatile("movl %0,%%dr3" : : "r" (dr3)); } -static __inline u_int -rdr4(void) -{ - u_int data; - __asm __volatile("movl %%dr4,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr4(u_int dr4) -{ - __asm __volatile("movl %0,%%dr4" : : "r" (dr4)); -} - -static __inline u_int -rdr5(void) -{ - u_int data; - __asm __volatile("movl %%dr5,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr5(u_int dr5) -{ - __asm __volatile("movl %0,%%dr5" : : "r" (dr5)); -} - static __inline u_int rdr6(void) { @@ -753,8 +732,6 @@ void load_dr0(u_int dr0); void load_dr1(u_int dr1); void load_dr2(u_int dr2); void load_dr3(u_int dr3); -void load_dr4(u_int dr4); -void load_dr5(u_int dr5); void load_dr6(u_int dr6); void load_dr7(u_int dr7); void load_fs(u_short sel); @@ -776,8 +753,6 @@ u_int rdr0(void); u_int rdr1(void); u_int rdr2(void); u_int rdr3(void); -u_int rdr4(void); -u_int rdr5(void); u_int rdr6(void); u_int rdr7(void); uint64_t rdtsc(void); diff --git a/freebsd/sys/isa/isavar.h b/freebsd/sys/isa/isavar.h index d2053da3..740a114c 100644 --- a/freebsd/sys/isa/isavar.h +++ b/freebsd/sys/isa/isavar.h @@ -188,19 +188,6 @@ void isa_hint_device_unit(device_t bus, device_t child, const char *name, int *unitp); int isab_attach(device_t dev); -#ifdef PC98 -#include - -/* - * Allocate discontinuous resources for ISA bus. - */ -struct resource * -isa_alloc_resourcev(device_t child, int type, int *rid, - bus_addr_t *res, bus_size_t count, u_int flags); -int -isa_load_resourcev(struct resource *re, bus_addr_t *res, bus_size_t count); -#endif - #endif /* _KERNEL */ #endif /* !_ISA_ISAVAR_H_ */ diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c index 72bab872..910c1820 100644 --- a/freebsd/sys/kern/init_main.c +++ b/freebsd/sys/kern/init_main.c @@ -102,7 +102,7 @@ void mi_startup(void); /* Should be elsewhere */ static struct session session0; static struct pgrp pgrp0; struct proc proc0; -struct thread0_storage thread0_st __aligned(16); +struct thread0_storage thread0_st __aligned(32); struct vmspace vmspace0; struct proc *initproc; diff --git a/freebsd/sys/kern/kern_condvar.c b/freebsd/sys/kern/kern_condvar.c index 6358c376..2843e273 100644 --- a/freebsd/sys/kern/kern_condvar.c +++ b/freebsd/sys/kern/kern_condvar.c @@ -124,7 +124,7 @@ _cv_wait(struct cv *cvp, struct lock_object *lock) "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); - if (SCHEDULER_STOPPED()) + if (SCHEDULER_STOPPED_TD(td)) return; sleepq_lock(cvp); @@ -178,7 +178,7 @@ _cv_wait_unlock(struct cv *cvp, struct lock_object *lock) ("cv_wait_unlock cannot be used with Giant")); class = LOCK_CLASS(lock); - if (SCHEDULER_STOPPED()) { + if (SCHEDULER_STOPPED_TD(td)) { class->lc_unlock(lock); return; } @@ -230,7 +230,7 @@ _cv_wait_sig(struct cv *cvp, struct lock_object *lock) "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); - if (SCHEDULER_STOPPED()) + if (SCHEDULER_STOPPED_TD(td)) return (0); sleepq_lock(cvp); @@ -295,7 +295,7 @@ _cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt, "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); - if (SCHEDULER_STOPPED()) + if (SCHEDULER_STOPPED_TD(td)) return (0); sleepq_lock(cvp); @@ -358,7 +358,7 @@ _cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock, "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); - if (SCHEDULER_STOPPED()) + if (SCHEDULER_STOPPED_TD(td)) return (0); sleepq_lock(cvp); diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c index 473414ae..a01e9b4b 100644 --- a/freebsd/sys/kern/kern_event.c +++ b/freebsd/sys/kern/kern_event.c @@ -372,6 +372,7 @@ static struct { { &null_filtops }, /* EVFILT_LIO */ { &user_filtops, 1 }, /* EVFILT_USER */ { &null_filtops }, /* EVFILT_SENDFILE */ + { &file_filtops, 1 }, /* EVFILT_EMPTY */ }; /* @@ -965,6 +966,17 @@ kqueue(void) } #endif /* __rtems__ */ +#ifdef KTRACE +static size_t +kev_iovlen(int n, u_int kgio) +{ + + if (n < 0 || n >= kgio / sizeof(struct kevent)) + return (kgio); + return (n * sizeof(struct kevent)); +} +#endif + #ifndef _SYS_SYSPROTO_H_ struct kevent_args { int fd; @@ -988,15 +1000,18 @@ int sys_kevent(struct thread *td, struct kevent_args *uap) { struct timespec ts, *tsp; - struct kevent_copyops k_ops = { uap, - kevent_copyout, - kevent_copyin}; + struct kevent_copyops k_ops = { + .arg = uap, + .k_copyout = kevent_copyout, + .k_copyin = kevent_copyin, + }; int error; #ifdef KTRACE struct uio ktruio; struct iovec ktriov; struct uio *ktruioin = NULL; struct uio *ktruioout = NULL; + u_int kgio; #endif if (uap->timeout != NULL) { @@ -1009,13 +1024,15 @@ sys_kevent(struct thread *td, struct kevent_args *uap) #ifdef KTRACE if (KTRPOINT(td, KTR_GENIO)) { + kgio = ktr_geniosize; ktriov.iov_base = uap->changelist; - ktriov.iov_len = uap->nchanges * sizeof(struct kevent); + ktriov.iov_len = kev_iovlen(uap->nchanges, kgio); ktruio = (struct uio){ .uio_iov = &ktriov, .uio_iovcnt = 1, .uio_segflg = UIO_USERSPACE, .uio_rw = UIO_READ, .uio_td = td }; ktruioin = cloneuio(&ktruio); ktriov.iov_base = uap->eventlist; + ktriov.iov_len = kev_iovlen(uap->nevents, kgio); ktriov.iov_len = uap->nevents * sizeof(struct kevent); ktruioout = cloneuio(&ktruio); } @@ -1026,9 +1043,9 @@ sys_kevent(struct thread *td, struct kevent_args *uap) #ifdef KTRACE if (ktruioin != NULL) { - ktruioin->uio_resid = uap->nchanges * sizeof(struct kevent); + ktruioin->uio_resid = kev_iovlen(uap->nchanges, kgio); ktrgenio(uap->fd, UIO_WRITE, ktruioin, 0); - ktruioout->uio_resid = td->td_retval[0] * sizeof(struct kevent); + ktruioout->uio_resid = kev_iovlen(td->td_retval[0], kgio); ktrgenio(uap->fd, UIO_READ, ktruioout, error); } #endif diff --git a/freebsd/sys/kern/kern_linker.c b/freebsd/sys/kern/kern_linker.c index 82a33023..5a69490d 100644 --- a/freebsd/sys/kern/kern_linker.c +++ b/freebsd/sys/kern/kern_linker.c @@ -475,7 +475,8 @@ linker_load_file(const char *filename, linker_file_t *result) * printout a message before to fail. */ if (error == ENOSYS) - printf("linker_load_file: Unsupported file type\n"); + printf("%s: %s - unsupported file type\n", + __func__, filename); /* * Format not recognized or otherwise unloadable. diff --git a/freebsd/sys/kern/kern_mib.c b/freebsd/sys/kern/kern_mib.c index 63d8b44f..b3ab9e9f 100644 --- a/freebsd/sys/kern/kern_mib.c +++ b/freebsd/sys/kern/kern_mib.c @@ -97,6 +97,11 @@ SYSCTL_ROOT_NODE(OID_AUTO, regression, CTLFLAG_RW, 0, "Regression test MIB"); #endif +#ifdef EXT_RESOURCES +SYSCTL_ROOT_NODE(OID_AUTO, clock, CTLFLAG_RW, 0, + "Clocks"); +#endif + SYSCTL_STRING(_kern, OID_AUTO, ident, CTLFLAG_RD|CTLFLAG_MPSAFE, kern_ident, 0, "Kernel identifier"); diff --git a/freebsd/sys/kern/kern_synch.c b/freebsd/sys/kern/kern_synch.c index 7109e798..91b3c72b 100644 --- a/freebsd/sys/kern/kern_synch.c +++ b/freebsd/sys/kern/kern_synch.c @@ -68,13 +68,6 @@ __FBSDID("$FreeBSD$"); #include -#define KTDSTATE(td) \ - (((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \ - ((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \ - ((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" : \ - ((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" : \ - ((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding") - #ifndef __rtems__ static void synch_setup(void *dummy); SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, @@ -164,11 +157,9 @@ _sleep(void *ident, struct lock_object *lock, int priority, "Sleeping on \"%s\"", wmesg); KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL, ("sleeping without a lock")); + KASSERT(ident != NULL, ("_sleep: NULL ident")); #ifndef __rtems__ - KASSERT(p != NULL, ("msleep1")); -#endif /* __rtems__ */ - KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); -#ifndef __rtems__ + KASSERT(TD_IS_RUNNING(td), ("_sleep: curthread not running")); if (priority & PDROP) KASSERT(lock != NULL && lock != &Giant.lock_object, ("PDROP requires a non-Giant lock")); @@ -179,7 +170,7 @@ _sleep(void *ident, struct lock_object *lock, int priority, class = NULL; #ifndef __rtems__ - if (SCHEDULER_STOPPED()) { + if (SCHEDULER_STOPPED_TD(td)) { if (lock != NULL && priority & PDROP) class->lc_unlock(lock); return (0); @@ -277,10 +268,10 @@ msleep_spin_sbt(void *ident, struct mtx *mtx, const char *wmesg, td = curthread; p = td->td_proc; KASSERT(mtx != NULL, ("sleeping without a mutex")); - KASSERT(p != NULL, ("msleep1")); - KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); + KASSERT(ident != NULL, ("msleep_spin_sbt: NULL ident")); + KASSERT(TD_IS_RUNNING(td), ("msleep_spin_sbt: curthread not running")); - if (SCHEDULER_STOPPED()) + if (SCHEDULER_STOPPED_TD(td)) return (0); sleepq_lock(ident); @@ -447,7 +438,7 @@ mi_switch(int flags, struct thread *newtd) */ if (kdb_active) kdb_switch(); - if (SCHEDULER_STOPPED()) + if (SCHEDULER_STOPPED_TD(td)) return; if (flags & SW_VOL) { td->td_ru.ru_nvcsw++; @@ -473,20 +464,12 @@ mi_switch(int flags, struct thread *newtd) PCPU_SET(switchticks, ticks); CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name); -#if (KTR_COMPILE & KTR_SCHED) != 0 - if (TD_IS_IDLETHREAD(td)) - KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle", - "prio:%d", td->td_priority); - else - KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td), - "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg, - "lockname:\"%s\"", td->td_lockname); +#ifdef KDTRACE_HOOKS + if ((flags & SW_PREEMPT) != 0 || ((flags & SW_INVOL) != 0 && + (flags & SW_TYPE_MASK) == SWT_NEEDRESCHED)) + SDT_PROBE0(sched, , , preempt); #endif - SDT_PROBE0(sched, , , preempt); sched_switch(td, newtd, flags); - KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running", - "prio:%d", td->td_priority); - CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name); diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c index 9553f4c7..0f003907 100644 --- a/freebsd/sys/kern/kern_sysctl.c +++ b/freebsd/sys/kern/kern_sysctl.c @@ -634,7 +634,8 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) if (oidp == NULL) return(EINVAL); if ((oidp->oid_kind & CTLFLAG_DYN) == 0) { - printf("can't remove non-dynamic nodes!\n"); + printf("Warning: can't remove non-dynamic nodes (%s)!\n", + oidp->oid_name); return (EINVAL); } /* diff --git a/freebsd/sys/kern/kern_time.c b/freebsd/sys/kern/kern_time.c index 95111932..2fc7092f 100644 --- a/freebsd/sys/kern/kern_time.c +++ b/freebsd/sys/kern/kern_time.c @@ -90,6 +90,9 @@ static uma_zone_t itimer_zone = NULL; static int settime(struct thread *, struct timeval *); #endif /* __rtems__ */ static void timevalfix(struct timeval *); +static int user_clock_nanosleep(struct thread *td, clockid_t clock_id, + int flags, const struct timespec *ua_rqtp, + struct timespec *ua_rmtp); #ifndef __rtems__ static void itimer_start(void); @@ -397,6 +400,11 @@ sys_clock_settime(struct thread *td, struct clock_settime_args *uap) return (kern_clock_settime(td, uap->clock_id, &ats)); } +static int allow_insane_settime = 0; +SYSCTL_INT(_debug, OID_AUTO, allow_insane_settime, CTLFLAG_RWTUN, + &allow_insane_settime, 0, + "do not perform possibly restrictive checks on settime(2) args"); + int kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats) { @@ -410,6 +418,8 @@ kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats) if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000 || ats->tv_sec < 0) return (EINVAL); + if (!allow_insane_settime && ats->tv_sec > 9999ULL * 366 * 24 * 60 * 60) + return (EINVAL); /* XXX Don't convert nsec->usec and back */ TIMESPEC_TO_TIMEVAL(&atv, ats); error = settime(td, &atv); @@ -487,47 +497,95 @@ kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts) } #endif +int +kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt) +{ + + return (kern_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, rqt, + rmt)); +} + static uint8_t nanowait[MAXCPU]; int -kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt) +kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, + const struct timespec *rqt, struct timespec *rmt) { - struct timespec ts; + struct timespec ts, now; sbintime_t sbt, sbtt, prec, tmp; time_t over; int error; + bool is_abs_real; if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000) return (EINVAL); - if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0)) - return (0); - ts = *rqt; - if (ts.tv_sec > INT32_MAX / 2) { - over = ts.tv_sec - INT32_MAX / 2; - ts.tv_sec -= over; - } else - over = 0; - tmp = tstosbt(ts); - prec = tmp; - prec >>= tc_precexp; - if (TIMESEL(&sbt, tmp)) - sbt += tc_tick_sbt; - sbt += tmp; - error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp", - sbt, prec, C_ABSOLUTE); + if ((flags & ~TIMER_ABSTIME) != 0) + return (EINVAL); + switch (clock_id) { + case CLOCK_REALTIME: + case CLOCK_REALTIME_PRECISE: + case CLOCK_REALTIME_FAST: + case CLOCK_SECOND: + is_abs_real = (flags & TIMER_ABSTIME) != 0; + break; + case CLOCK_MONOTONIC: + case CLOCK_MONOTONIC_PRECISE: + case CLOCK_MONOTONIC_FAST: + case CLOCK_UPTIME: + case CLOCK_UPTIME_PRECISE: + case CLOCK_UPTIME_FAST: + is_abs_real = false; + break; + case CLOCK_VIRTUAL: + case CLOCK_PROF: + case CLOCK_PROCESS_CPUTIME_ID: + return (ENOTSUP); + case CLOCK_THREAD_CPUTIME_ID: + default: + return (EINVAL); + } + do { + ts = *rqt; + if ((flags & TIMER_ABSTIME) != 0) { + if (is_abs_real) + td->td_rtcgen = + atomic_load_acq_int(&rtc_generation); + error = kern_clock_gettime(td, clock_id, &now); + KASSERT(error == 0, ("kern_clock_gettime: %d", error)); + timespecsub(&ts, &now); + } + if (ts.tv_sec < 0 || (ts.tv_sec == 0 && ts.tv_nsec == 0)) { + error = EWOULDBLOCK; + break; + } + if (ts.tv_sec > INT32_MAX / 2) { + over = ts.tv_sec - INT32_MAX / 2; + ts.tv_sec -= over; + } else + over = 0; + tmp = tstosbt(ts); + prec = tmp; + prec >>= tc_precexp; + if (TIMESEL(&sbt, tmp)) + sbt += tc_tick_sbt; + sbt += tmp; + error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp", + sbt, prec, C_ABSOLUTE); + } while (error == 0 && is_abs_real && td->td_rtcgen == 0); + td->td_rtcgen = 0; if (error != EWOULDBLOCK) { + TIMESEL(&sbtt, tmp); + if (sbtt >= sbt) + return (0); if (error == ERESTART) error = EINTR; - TIMESEL(&sbtt, tmp); - if (rmt != NULL) { + if ((flags & TIMER_ABSTIME) == 0 && rmt != NULL) { ts = sbttots(sbt - sbtt); ts.tv_sec += over; if (ts.tv_sec < 0) timespecclear(&ts); *rmt = ts; } - if (sbtt >= sbt) - return (0); return (error); } return (0); @@ -542,22 +600,49 @@ struct nanosleep_args { /* ARGSUSED */ int sys_nanosleep(struct thread *td, struct nanosleep_args *uap) +{ + + return (user_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, + uap->rqtp, uap->rmtp)); +} + +#ifndef _SYS_SYSPROTO_H_ +struct clock_nanosleep_args { + clockid_t clock_id; + int flags; + struct timespec *rqtp; + struct timespec *rmtp; +}; +#endif +/* ARGSUSED */ +int +sys_clock_nanosleep(struct thread *td, struct clock_nanosleep_args *uap) +{ + int error; + + error = user_clock_nanosleep(td, uap->clock_id, uap->flags, uap->rqtp, + uap->rmtp); + return (kern_posix_error(td, error)); +} + +static int +user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, + const struct timespec *ua_rqtp, struct timespec *ua_rmtp) { struct timespec rmt, rqt; int error; - error = copyin(uap->rqtp, &rqt, sizeof(rqt)); + error = copyin(ua_rqtp, &rqt, sizeof(rqt)); if (error) return (error); - - if (uap->rmtp && - !useracc((caddr_t)uap->rmtp, sizeof(rmt), VM_PROT_WRITE)) - return (EFAULT); - error = kern_nanosleep(td, &rqt, &rmt); - if (error && uap->rmtp) { + if (ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0 && + !useracc(ua_rmtp, sizeof(rmt), VM_PROT_WRITE)) + return (EFAULT); + error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt); + if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) { int error2; - error2 = copyout(&rmt, uap->rmtp, sizeof(rmt)); + error2 = copyout(&rmt, ua_rmtp, sizeof(rmt)); if (error2) error = error2; } diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c index 73b55338..17d23ba6 100644 --- a/freebsd/sys/kern/kern_timeout.c +++ b/freebsd/sys/kern/kern_timeout.c @@ -1345,9 +1345,12 @@ again: if (cc_exec_curr(cc, direct) == c) { /* * Succeed we to stop it or not, we must clear the - * active flag - this is what API users expect. + * active flag - this is what API users expect. If we're + * draining and the callout is currently executing, first wait + * until it finishes. */ - c->c_flags &= ~CALLOUT_ACTIVE; + if ((flags & CS_DRAIN) == 0) + c->c_flags &= ~CALLOUT_ACTIVE; if ((flags & CS_DRAIN) != 0) { /* @@ -1419,6 +1422,7 @@ again: &cc->cc_lock, "codrain", 0); #endif /* __rtems__ */ } + c->c_flags &= ~CALLOUT_ACTIVE; } else if (use_lock && !cc_exec_cancel(cc, direct) && (drain == NULL)) { diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c index e8339c93..cf94de2f 100644 --- a/freebsd/sys/kern/subr_bus.c +++ b/freebsd/sys/kern/subr_bus.c @@ -1117,7 +1117,7 @@ devclass_driver_added(devclass_t dc, driver_t *driver) int i; /* - * Call BUS_DRIVER_ADDED for any existing busses in this class. + * Call BUS_DRIVER_ADDED for any existing buses in this class. */ for (i = 0; i < dc->maxunit; i++) if (dc->devices[i] && device_is_attached(dc->devices[i])) @@ -3309,7 +3309,7 @@ resource_list_delete(struct resource_list *rl, int type, int rid) /** * @brief Allocate a reserved resource * - * This can be used by busses to force the allocation of resources + * This can be used by buses to force the allocation of resources * that are always active in the system even if they are not allocated * by a driver (e.g. PCI BARs). This function is usually called when * adding a new child to the bus. The resource is allocated from the @@ -3688,7 +3688,7 @@ bus_generic_probe(device_t dev) * only call the identify routines of eligible drivers * when this routine is called. Drivers for later * passes should have their identify routines called - * on early-pass busses during BUS_NEW_PASS(). + * on early-pass buses during BUS_NEW_PASS(). */ if (dl->pass > bus_current_pass) continue; diff --git a/freebsd/sys/kern/subr_lock.c b/freebsd/sys/kern/subr_lock.c index 5aba8941..7c62bca0 100644 --- a/freebsd/sys/kern/subr_lock.c +++ b/freebsd/sys/kern/subr_lock.c @@ -58,6 +58,9 @@ __FBSDID("$FreeBSD$"); #include +SDT_PROVIDER_DEFINE(lock); +SDT_PROBE_DEFINE1(lock, , , starvation, "u_int"); + CTASSERT(LOCK_CLASS_MAX == 15); struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = { @@ -116,32 +119,56 @@ lock_destroy(struct lock_object *lock) } #ifndef __rtems__ +static SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging"); +static SYSCTL_NODE(_debug_lock, OID_AUTO, delay, CTLFLAG_RD, NULL, + "lock delay"); + +static u_int __read_mostly starvation_limit = 131072; +SYSCTL_INT(_debug_lock_delay, OID_AUTO, starvation_limit, CTLFLAG_RW, + &starvation_limit, 0, ""); + +static u_int __read_mostly restrict_starvation = 0; +SYSCTL_INT(_debug_lock_delay, OID_AUTO, restrict_starvation, CTLFLAG_RW, + &restrict_starvation, 0, ""); + void lock_delay(struct lock_delay_arg *la) { - u_int i, delay, backoff, min, max; struct lock_delay_config *lc = la->config; + u_int i; - delay = la->delay; + la->delay <<= 1; + if (__predict_false(la->delay > lc->max)) + la->delay = lc->max; - if (delay == 0) - delay = lc->initial; - else { - delay += lc->step; - max = lc->max; - if (delay > max) - delay = max; + for (i = la->delay; i > 0; i--) + cpu_spinwait(); + + la->spin_cnt += la->delay; + if (__predict_false(la->spin_cnt > starvation_limit)) { + SDT_PROBE1(lock, , , starvation, la->delay); + if (restrict_starvation) + la->delay = lc->base; } +} - backoff = cpu_ticks() % delay; - min = lc->min; - if (backoff < min) - backoff = min; - for (i = 0; i < backoff; i++) - cpu_spinwait(); +static u_int +lock_roundup_2(u_int val) +{ + u_int res; + + for (res = 1; res <= val; res <<= 1) + continue; + + return (res); +} - la->delay = delay; - la->spin_cnt += backoff; +void +lock_delay_default_init(struct lock_delay_config *lc) +{ + + lc->base = lock_roundup_2(mp_ncpus) / 4; + lc->max = lc->base * 1024; } #endif /* __rtems__ */ @@ -227,7 +254,7 @@ struct lock_prof_cpu { struct lock_prof_cpu *lp_cpu[MAXCPU]; -volatile int lock_prof_enable = 0; +volatile int __read_mostly lock_prof_enable; static volatile int lock_prof_resetting; #define LPROF_SBUF_SIZE 256 @@ -669,7 +696,6 @@ out: critical_exit(); } -static SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging"); static SYSCTL_NODE(_debug_lock, OID_AUTO, prof, CTLFLAG_RD, NULL, "lock profiling"); SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipspin, CTLFLAG_RW, diff --git a/freebsd/sys/kern/subr_prf.c b/freebsd/sys/kern/subr_prf.c index 3aab5aaa..ffc8165f 100644 --- a/freebsd/sys/kern/subr_prf.c +++ b/freebsd/sys/kern/subr_prf.c @@ -86,6 +86,14 @@ __FBSDID("$FreeBSD$"); #include #endif +/* + * This is needed for sbuf_putbuf() when compiled into userland. Due to the + * shared nature of this file, it's the only place to put it. + */ +#ifndef _KERNEL +#include +#endif + #ifdef _KERNEL #ifndef __rtems__ @@ -436,6 +444,23 @@ vprintf(const char *fmt, va_list ap) } #ifndef __rtems__ +static void +prf_putbuf(char *bufr, int flags, int pri) +{ + + if (flags & TOLOG) + msglogstr(bufr, pri, /*filter_cr*/1); + + if (flags & TOCONS) { + if ((panicstr == NULL) && (constty != NULL)) + msgbuf_addstr(&consmsgbuf, -1, + bufr, /*filter_cr*/ 0); + + if ((constty == NULL) ||(always_console_output)) + cnputs(bufr); + } +} + static void putbuf(int c, struct putchar_arg *ap) { @@ -457,18 +482,7 @@ putbuf(int c, struct putchar_arg *ap) /* Check if the buffer needs to be flushed. */ if (ap->remain == 2 || c == '\n') { - - if (ap->flags & TOLOG) - msglogstr(ap->p_bufr, ap->pri, /*filter_cr*/1); - - if (ap->flags & TOCONS) { - if ((panicstr == NULL) && (constty != NULL)) - msgbuf_addstr(&consmsgbuf, -1, - ap->p_bufr, /*filter_cr*/ 0); - - if ((constty == NULL) ||(always_console_output)) - cnputs(ap->p_bufr); - } + prf_putbuf(ap->p_bufr, ap->flags, ap->pri); ap->p_next = ap->p_bufr; ap->remain = ap->n_bufr; @@ -1259,4 +1273,20 @@ counted_warning(unsigned *counter, const char *msg) } } #endif + +#ifdef _KERNEL +void +sbuf_putbuf(struct sbuf *sb) +{ + + prf_putbuf(sbuf_data(sb), TOLOG | TOCONS, -1); +} +#else +void +sbuf_putbuf(struct sbuf *sb) +{ + + printf("%s", sbuf_data(sb)); +} +#endif #endif /* __rtems__ */ diff --git a/freebsd/sys/kern/subr_sleepqueue.c b/freebsd/sys/kern/subr_sleepqueue.c index 90023066..f90c7102 100644 --- a/freebsd/sys/kern/subr_sleepqueue.c +++ b/freebsd/sys/kern/subr_sleepqueue.c @@ -29,7 +29,7 @@ /* * Implementation of sleep queues used to hold queue of threads blocked on - * a wait channel. Sleep queues different from turnstiles in that wait + * a wait channel. Sleep queues are different from turnstiles in that wait * channels are not owned by anyone, so there is no priority propagation. * Sleep queues can also provide a timeout and can also be interrupted by * signals. That said, there are several similarities between the turnstile @@ -39,7 +39,7 @@ * a linked list of queues. An individual queue is located by using a hash * to pick a chain, locking the chain, and then walking the chain searching * for the queue. This means that a wait channel object does not need to - * embed it's queue head just as locks do not embed their turnstile queue + * embed its queue head just as locks do not embed their turnstile queue * head. Threads also carry around a sleep queue that they lend to the * wait channel when blocking. Just as in turnstiles, the queue includes * a free list of the sleep queues of other threads blocked on the same @@ -81,6 +81,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include + +#include #include @@ -107,7 +110,7 @@ __FBSDID("$FreeBSD$"); #define SC_LOOKUP(wc) &sleepq_chains[SC_HASH(wc)] #define NR_SLEEPQS 2 /* - * There two different lists of sleep queues. Both lists are connected + * There are two different lists of sleep queues. Both lists are connected * via the sq_hash entries. The first list is the sleep queue chain list * that a sleep queue is on when it is attached to a wait channel. The * second list is the free list hung off of a sleep queue that is attached @@ -198,7 +201,7 @@ init_sleepqueue_profiling(void) for (i = 0; i < SC_TABLESIZE; i++) { snprintf(chain_name, sizeof(chain_name), "%u", i); - chain_oid = SYSCTL_ADD_NODE(NULL, + chain_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO, chain_name, CTLFLAG_RD, NULL, "sleepq chain stats"); SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, @@ -233,7 +236,7 @@ init_sleepqueues(void) #else NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0); #endif - + #ifndef __rtems__ thread0.td_sleepqueue = sleepq_alloc(); #endif /* __rtems__ */ @@ -518,6 +521,7 @@ sleepq_catch_signals(void *wchan, int pri) struct sigacts *ps; int sig, ret; + ret = 0; td = curthread; p = curproc; sc = SC_LOOKUP(wchan); @@ -531,53 +535,65 @@ sleepq_catch_signals(void *wchan, int pri) } /* - * See if there are any pending signals for this thread. If not - * we can switch immediately. Otherwise do the signal processing - * directly. + * See if there are any pending signals or suspension requests for this + * thread. If not, we can switch immediately. */ thread_lock(td); - if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0) { - sleepq_switch(wchan, pri); - return (0); - } - thread_unlock(td); - mtx_unlock_spin(&sc->sc_lock); - CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)", - (void *)td, (long)p->p_pid, td->td_name); - PROC_LOCK(p); - ps = p->p_sigacts; - mtx_lock(&ps->ps_mtx); - sig = cursig(td); - if (sig == -1) { - mtx_unlock(&ps->ps_mtx); - KASSERT((td->td_flags & TDF_SBDRY) != 0, ("lost TDF_SBDRY")); - KASSERT(TD_SBDRY_INTR(td), - ("lost TDF_SERESTART of TDF_SEINTR")); - KASSERT((td->td_flags & (TDF_SEINTR | TDF_SERESTART)) != - (TDF_SEINTR | TDF_SERESTART), - ("both TDF_SEINTR and TDF_SERESTART")); - ret = TD_SBDRY_ERRNO(td); - } else if (sig == 0) { - mtx_unlock(&ps->ps_mtx); - ret = thread_suspend_check(1); - MPASS(ret == 0 || ret == EINTR || ret == ERESTART); - } else { - if (SIGISMEMBER(ps->ps_sigintr, sig)) - ret = EINTR; - else - ret = ERESTART; - mtx_unlock(&ps->ps_mtx); + if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) != 0) { + thread_unlock(td); + mtx_unlock_spin(&sc->sc_lock); + CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)", + (void *)td, (long)p->p_pid, td->td_name); + PROC_LOCK(p); + /* + * Check for suspension first. Checking for signals and then + * suspending could result in a missed signal, since a signal + * can be delivered while this thread is suspended. + */ + if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) { + ret = thread_suspend_check(1); + MPASS(ret == 0 || ret == EINTR || ret == ERESTART); + if (ret != 0) { + PROC_UNLOCK(p); + mtx_lock_spin(&sc->sc_lock); + thread_lock(td); + goto out; + } + } + if ((td->td_flags & TDF_NEEDSIGCHK) != 0) { + ps = p->p_sigacts; + mtx_lock(&ps->ps_mtx); + sig = cursig(td); + if (sig == -1) { + mtx_unlock(&ps->ps_mtx); + KASSERT((td->td_flags & TDF_SBDRY) != 0, + ("lost TDF_SBDRY")); + KASSERT(TD_SBDRY_INTR(td), + ("lost TDF_SERESTART of TDF_SEINTR")); + KASSERT((td->td_flags & + (TDF_SEINTR | TDF_SERESTART)) != + (TDF_SEINTR | TDF_SERESTART), + ("both TDF_SEINTR and TDF_SERESTART")); + ret = TD_SBDRY_ERRNO(td); + } else if (sig != 0) { + ret = SIGISMEMBER(ps->ps_sigintr, sig) ? + EINTR : ERESTART; + mtx_unlock(&ps->ps_mtx); + } else { + mtx_unlock(&ps->ps_mtx); + } + } + /* + * Lock the per-process spinlock prior to dropping the PROC_LOCK + * to avoid a signal delivery race. PROC_LOCK, PROC_SLOCK, and + * thread_lock() are currently held in tdsendsignal(). + */ + PROC_SLOCK(p); + mtx_lock_spin(&sc->sc_lock); + PROC_UNLOCK(p); + thread_lock(td); + PROC_SUNLOCK(p); } - /* - * Lock the per-process spinlock prior to dropping the PROC_LOCK - * to avoid a signal delivery race. PROC_LOCK, PROC_SLOCK, and - * thread_lock() are currently held in tdsendsignal(). - */ - PROC_SLOCK(p); - mtx_lock_spin(&sc->sc_lock); - PROC_UNLOCK(p); - thread_lock(td); - PROC_SUNLOCK(p); if (ret == 0) { sleepq_switch(wchan, pri); return (0); @@ -616,13 +632,14 @@ sleepq_switch(void *wchan, int pri) struct sleepqueue_chain *sc; struct sleepqueue *sq; struct thread *td; + bool rtc_changed; td = curthread; sc = SC_LOOKUP(wchan); mtx_assert(&sc->sc_lock, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); - /* + /* * If we have a sleep queue, then we've already been woken up, so * just return. */ @@ -635,8 +652,26 @@ sleepq_switch(void *wchan, int pri) * If TDF_TIMEOUT is set, then our sleep has been timed out * already but we are still on the sleep queue, so dequeue the * thread and return. + * + * Do the same if the real-time clock has been adjusted since this + * thread calculated its timeout based on that clock. This handles + * the following race: + * - The Ts thread needs to sleep until an absolute real-clock time. + * It copies the global rtc_generation into curthread->td_rtcgen, + * reads the RTC, and calculates a sleep duration based on that time. + * See umtxq_sleep() for an example. + * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes + * threads that are sleeping until an absolute real-clock time. + * See tc_setclock() and the POSIX specification of clock_settime(). + * - Ts reaches the code below. It holds the sleepqueue chain lock, + * so Tc has finished waking, so this thread must test td_rtcgen. + * (The declaration of td_rtcgen refers to this comment.) */ - if (td->td_flags & TDF_TIMEOUT) { + rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation; + if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) { + if (rtc_changed) { + td->td_rtcgen = 0; + } MPASS(TD_ON_SLEEPQ(td)); sq = sleepq_lookup(wchan); if (sleepq_resume_thread(sq, td, 0)) { @@ -649,7 +684,7 @@ sleepq_switch(void *wchan, int pri) #endif } mtx_unlock_spin(&sc->sc_lock); - return; + return; } #ifdef SLEEPQUEUE_PROFILING if (prof_enabled) @@ -1126,6 +1161,13 @@ sleepq_signal(void *wchan, int flags, int pri, int queue) return (wakeup_swapper); } +static bool +match_any(struct thread *td __unused) +{ + + return (true); +} + /* * Resume all threads sleeping on a specified wait channel. */ @@ -1133,8 +1175,6 @@ int sleepq_broadcast(void *wchan, int flags, int pri, int queue) { struct sleepqueue *sq; - struct thread *td, *tdn; - int wakeup_swapper; CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags); KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); @@ -1145,18 +1185,33 @@ sleepq_broadcast(void *wchan, int flags, int pri, int queue) KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), ("%s: mismatch between sleep/wakeup and cv_*", __func__)); + return (sleepq_remove_matching(sq, queue, match_any, pri)); +} + +/* + * Resume threads on the sleep queue that match the given predicate. + */ +int +sleepq_remove_matching(struct sleepqueue *sq, int queue, + bool (*matches)(struct thread *), int pri) +{ + struct thread *td, *tdn; + int wakeup_swapper; + /* - * Resume all blocked threads on the sleep queue. The last thread will - * be given ownership of sq and may re-enqueue itself before - * sleepq_resume_thread() returns, so we must cache the "next" queue - * item at the beginning of the final iteration. + * The last thread will be given ownership of sq and may + * re-enqueue itself before sleepq_resume_thread() returns, + * so we must cache the "next" queue item at the beginning + * of the final iteration. */ wakeup_swapper = 0; TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) { thread_lock(td); - wakeup_swapper |= sleepq_resume_thread(sq, td, pri); + if (matches(td)) + wakeup_swapper |= sleepq_resume_thread(sq, td, pri); thread_unlock(td); } + return (wakeup_swapper); } @@ -1339,6 +1394,32 @@ sleepq_abort(struct thread *td, int intrval) } #endif /* __rtems__ */ +void +sleepq_chains_remove_matching(bool (*matches)(struct thread *)) +{ + struct sleepqueue_chain *sc; + struct sleepqueue *sq; + int i, wakeup_swapper; + + wakeup_swapper = 0; + for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) { + if (LIST_EMPTY(&sc->sc_queues)) { + continue; + } + mtx_lock_spin(&sc->sc_lock); + LIST_FOREACH(sq, &sc->sc_queues, sq_hash) { + for (i = 0; i < NR_SLEEPQS; ++i) { + wakeup_swapper |= sleepq_remove_matching(sq, i, + matches, 0); + } + } + mtx_unlock_spin(&sc->sc_lock); + } + if (wakeup_swapper) { + kick_proc0(); + } +} + /* * Prints the stacks of all threads presently sleeping on wchan/queue to * the sbuf sb. Sets count_stacks_printed to the number of stacks actually diff --git a/freebsd/sys/kern/subr_taskqueue.c b/freebsd/sys/kern/subr_taskqueue.c index 5ef8683c..c739ccf3 100644 --- a/freebsd/sys/kern/subr_taskqueue.c +++ b/freebsd/sys/kern/subr_taskqueue.c @@ -522,6 +522,23 @@ task_is_running(struct taskqueue *queue, struct task *task) return (0); } +/* + * Only use this function in single threaded contexts. It returns + * non-zero if the given task is either pending or running. Else the + * task is idle and can be queued again or freed. + */ +int +taskqueue_poll_is_busy(struct taskqueue *queue, struct task *task) +{ + int retval; + + TQ_LOCK(queue); + retval = task->ta_pending > 0 || task_is_running(queue, task); + TQ_UNLOCK(queue); + + return (retval); +} + static int taskqueue_cancel_locked(struct taskqueue *queue, struct task *task, u_int *pendp) diff --git a/freebsd/sys/kern/subr_uio.c b/freebsd/sys/kern/subr_uio.c index f5dc76e7..3a34e521 100644 --- a/freebsd/sys/kern/subr_uio.c +++ b/freebsd/sys/kern/subr_uio.c @@ -492,10 +492,11 @@ copyout_map(struct thread *td, vm_offset_t *addr, size_t sz) /* round size up to page boundary */ size = (vm_size_t)round_page(sz); - - error = vm_mmap(&vms->vm_map, addr, size, VM_PROT_READ | VM_PROT_WRITE, - VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0); - + if (size == 0) + return (EINVAL); + error = vm_mmap_object(&vms->vm_map, addr, size, VM_PROT_READ | + VM_PROT_WRITE, VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, NULL, 0, + FALSE, td); return (error); } diff --git a/freebsd/sys/kern/subr_unit.c b/freebsd/sys/kern/subr_unit.c index 678916f8..cdbe5343 100644 --- a/freebsd/sys/kern/subr_unit.c +++ b/freebsd/sys/kern/subr_unit.c @@ -218,7 +218,7 @@ ub_full(struct unrb *ub, int len) * Consistency check function. * * Checks the internal consistency as well as we can. - * + * * Called at all boundaries of this API. */ static void @@ -242,7 +242,7 @@ check_unrhdr(struct unrhdr *uh, int line) w = 0; bit_count(ub->map, 0, up->len, &w); y += w; - } else if (up->ptr != NULL) + } else if (up->ptr != NULL) y += up->len; } KASSERT (y == uh->busy, @@ -377,7 +377,7 @@ is_bitmap(struct unrhdr *uh, struct unr *up) /* * Look for sequence of items which can be combined into a bitmap, if * multiple are present, take the one which saves most memory. - * + * * Return (1) if a sequence was found to indicate that another call * might be able to do more. Return (0) if we found no suitable sequence. * @@ -593,7 +593,7 @@ alloc_unrl(struct unrhdr *uh) } /* - * We can always allocate from the first list element, so if we have + * We can always allocate from the first list element, so if we have * nothing on the list, we must have run out of unit numbers. */ if (up == NULL) @@ -805,7 +805,7 @@ free_unrl(struct unrhdr *uh, u_int item, void **p1, void **p2) /* Handle bitmap items */ if (is_bitmap(uh, up)) { ub = up->ptr; - + KASSERT(bit_test(ub->map, item) != 0, ("UNR: Freeing free item %d (bitmap)\n", item)); bit_clear(ub->map, item); @@ -911,7 +911,7 @@ print_unr(struct unrhdr *uh, struct unr *up) for (x = 0; x < up->len; x++) { if (bit_test(ub->map, x)) printf("#"); - else + else printf(" "); } printf("]\n"); @@ -988,7 +988,7 @@ main(int argc, char **argv) long count = 10000; /* Number of unrs to test */ long reps = 1, m; int ch; - u_int i, x, j; + u_int i, j; verbose = false; @@ -1001,7 +1001,7 @@ main(int argc, char **argv) usage(argv); exit(2); } - + break; case 'v': verbose = true; @@ -1028,7 +1028,6 @@ main(int argc, char **argv) printf("sizeof(struct unrb) %zu\n", sizeof(struct unrb)); printf("sizeof(struct unrhdr) %zu\n", sizeof(struct unrhdr)); printf("NBITS %lu\n", (unsigned long)NBITS); - x = 1; for (m = 0; m < count * reps; m++) { j = random(); i = (j >> 1) % count; diff --git a/freebsd/sys/kern/sys_generic.c b/freebsd/sys/kern/sys_generic.c index d6b3d15b..c3bf19f5 100644 --- a/freebsd/sys/kern/sys_generic.c +++ b/freebsd/sys/kern/sys_generic.c @@ -232,39 +232,37 @@ struct pread_args { }; #endif int -sys_pread(td, uap) - struct thread *td; - struct pread_args *uap; +sys_pread(struct thread *td, struct pread_args *uap) +{ + + return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); +} + +int +kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset) { struct uio auio; struct iovec aiov; int error; - if (uap->nbyte > IOSIZE_MAX) + if (nbyte > IOSIZE_MAX) return (EINVAL); - aiov.iov_base = uap->buf; - aiov.iov_len = uap->nbyte; + aiov.iov_base = buf; + aiov.iov_len = nbyte; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; - auio.uio_resid = uap->nbyte; + auio.uio_resid = nbyte; auio.uio_segflg = UIO_USERSPACE; - error = kern_preadv(td, uap->fd, &auio, uap->offset); - return(error); + error = kern_preadv(td, fd, &auio, offset); + return (error); } #if defined(COMPAT_FREEBSD6) int -freebsd6_pread(td, uap) - struct thread *td; - struct freebsd6_pread_args *uap; +freebsd6_pread(struct thread *td, struct freebsd6_pread_args *uap) { - struct pread_args oargs; - oargs.fd = uap->fd; - oargs.buf = uap->buf; - oargs.nbyte = uap->nbyte; - oargs.offset = uap->offset; - return (sys_pread(td, &oargs)); + return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); } #endif @@ -348,7 +346,8 @@ kern_preadv(td, fd, auio, offset) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) error = ESPIPE; - else if (offset < 0 && fp->f_vnode->v_type != VCHR) + else if (offset < 0 && + (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) error = EINVAL; else error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET); @@ -447,39 +446,38 @@ struct pwrite_args { }; #endif int -sys_pwrite(td, uap) - struct thread *td; - struct pwrite_args *uap; +sys_pwrite(struct thread *td, struct pwrite_args *uap) +{ + + return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); +} + +int +kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte, + off_t offset) { struct uio auio; struct iovec aiov; int error; - if (uap->nbyte > IOSIZE_MAX) + if (nbyte > IOSIZE_MAX) return (EINVAL); - aiov.iov_base = (void *)(uintptr_t)uap->buf; - aiov.iov_len = uap->nbyte; + aiov.iov_base = (void *)(uintptr_t)buf; + aiov.iov_len = nbyte; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; - auio.uio_resid = uap->nbyte; + auio.uio_resid = nbyte; auio.uio_segflg = UIO_USERSPACE; - error = kern_pwritev(td, uap->fd, &auio, uap->offset); + error = kern_pwritev(td, fd, &auio, offset); return(error); } #if defined(COMPAT_FREEBSD6) int -freebsd6_pwrite(td, uap) - struct thread *td; - struct freebsd6_pwrite_args *uap; +freebsd6_pwrite(struct thread *td, struct freebsd6_pwrite_args *uap) { - struct pwrite_args oargs; - oargs.fd = uap->fd; - oargs.buf = uap->buf; - oargs.nbyte = uap->nbyte; - oargs.offset = uap->offset; - return (sys_pwrite(td, &oargs)); + return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); } #endif @@ -563,7 +561,8 @@ kern_pwritev(td, fd, auio, offset) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) error = ESPIPE; - else if (offset < 0 && fp->f_vnode->v_type != VCHR) + else if (offset < 0 && + (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) error = EINVAL; else error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET); diff --git a/freebsd/sys/kern/uipc_mbuf.c b/freebsd/sys/kern/uipc_mbuf.c index 571dd3bd..74f4f0b4 100644 --- a/freebsd/sys/kern/uipc_mbuf.c +++ b/freebsd/sys/kern/uipc_mbuf.c @@ -171,7 +171,7 @@ CTASSERT(sizeof(struct m_ext) == 28); * plain pointer does. */ #ifdef INVARIANTS -static struct mbuf m_assertbuf; +static struct mbuf __used m_assertbuf; CTASSERT(sizeof(m_assertbuf.m_slist) == sizeof(m_assertbuf.m_next)); CTASSERT(sizeof(m_assertbuf.m_stailq) == sizeof(m_assertbuf.m_next)); CTASSERT(sizeof(m_assertbuf.m_slistpkt) == sizeof(m_assertbuf.m_nextpkt)); @@ -1563,7 +1563,7 @@ m_uiotombuf(struct uio *uio, int how, int len, int align, int flags) * Copy an mbuf chain into a uio limited by len if set. */ int -m_mbuftouio(struct uio *uio, struct mbuf *m, int len) +m_mbuftouio(struct uio *uio, const struct mbuf *m, int len) { int error, length, total; int progress = 0; diff --git a/freebsd/sys/kern/uipc_mbuf2.c b/freebsd/sys/kern/uipc_mbuf2.c index fc5c8e8a..dd73910d 100644 --- a/freebsd/sys/kern/uipc_mbuf2.c +++ b/freebsd/sys/kern/uipc_mbuf2.c @@ -161,7 +161,7 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp) * the target data is on . * if we got enough data on the mbuf "n", we're done. */ - if ((off == 0 || offp) && len <= n->m_len - off && writable) + if ((off == 0 || offp) && len <= n->m_len - off) goto ok; /* diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c index a7977141..b2a6460a 100644 --- a/freebsd/sys/kern/uipc_sockbuf.c +++ b/freebsd/sys/kern/uipc_sockbuf.c @@ -1049,6 +1049,11 @@ sbcut_internal(struct sockbuf *sb, int len) { struct mbuf *m, *next, *mfree; + KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0", + __func__, len)); + KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u", + __func__, len, sb->sb_ccc)); + next = (m = sb->sb_mb) ? m->m_nextpkt : 0; mfree = NULL; diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c index 5f01844d..59a52115 100644 --- a/freebsd/sys/kern/uipc_socket.c +++ b/freebsd/sys/kern/uipc_socket.c @@ -165,18 +165,13 @@ static void filt_sordetach(struct knote *kn); static int filt_soread(struct knote *kn, long hint); static void filt_sowdetach(struct knote *kn); static int filt_sowrite(struct knote *kn, long hint); -static int filt_solisten(struct knote *kn, long hint); static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id); +static int filt_soempty(struct knote *kn, long hint); #ifdef __rtems__ static #endif /* __rtems__ */ fo_kqfilter_t soo_kqfilter; -static struct filterops solisten_filtops = { - .f_isfd = 1, - .f_detach = filt_sordetach, - .f_event = filt_solisten, -}; static struct filterops soread_filtops = { .f_isfd = 1, .f_detach = filt_sordetach, @@ -187,6 +182,11 @@ static struct filterops sowrite_filtops = { .f_detach = filt_sowdetach, .f_event = filt_sowrite, }; +static struct filterops soempty_filtops = { + .f_isfd = 1, + .f_detach = filt_sowdetach, + .f_event = filt_soempty, +}; so_gen_t so_gencnt; /* generation count for sockets */ @@ -2714,6 +2714,26 @@ sosetopt(struct socket *so, struct sockopt *sopt) #endif break; + case SO_TS_CLOCK: + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + goto bad; + if (optval < 0 || optval > SO_TS_CLOCK_MAX) { + error = EINVAL; + goto bad; + } + so->so_ts_clock = optval; + break; + + case SO_MAX_PACING_RATE: + error = sooptcopyin(sopt, &val32, sizeof(val32), + sizeof(val32)); + if (error) + goto bad; + so->so_max_pacing_rate = val32; + break; + default: if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0) error = hhook_run_socket(so, sopt, @@ -2901,6 +2921,14 @@ integer: optval = so->so_incqlen; goto integer; + case SO_TS_CLOCK: + optval = so->so_ts_clock; + goto integer; + + case SO_MAX_PACING_RATE: + optval = so->so_max_pacing_rate; + goto integer; + default: if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0) error = hhook_run_socket(so, sopt, @@ -3108,16 +3136,17 @@ soo_kqfilter(struct file *fp, struct knote *kn) switch (kn->kn_filter) { case EVFILT_READ: - if (so->so_options & SO_ACCEPTCONN) - kn->kn_fop = &solisten_filtops; - else - kn->kn_fop = &soread_filtops; + kn->kn_fop = &soread_filtops; sb = &so->so_rcv; break; case EVFILT_WRITE: kn->kn_fop = &sowrite_filtops; sb = &so->so_snd; break; + case EVFILT_EMPTY: + kn->kn_fop = &soempty_filtops; + sb = &so->so_snd; + break; default: return (EINVAL); } @@ -3327,6 +3356,11 @@ filt_soread(struct knote *kn, long hint) struct socket *so; so = kn->kn_fp->f_data; + if (so->so_options & SO_ACCEPTCONN) { + kn->kn_data = so->so_qlen; + return (!TAILQ_EMPTY(&so->so_comp)); + + } SOCKBUF_LOCK_ASSERT(&so->so_rcv); kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl; @@ -3339,11 +3373,9 @@ filt_soread(struct knote *kn, long hint) if (kn->kn_sfflags & NOTE_LOWAT) { if (kn->kn_data >= kn->kn_sdata) - return 1; - } else { - if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat) - return 1; - } + return (1); + } else if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat) + return (1); /* This hook returning non-zero indicates an event, not error */ return (hhook_run_socket(so, NULL, HHOOK_FILT_SOREAD)); @@ -3388,14 +3420,19 @@ filt_sowrite(struct knote *kn, long hint) return (kn->kn_data >= so->so_snd.sb_lowat); } -/*ARGSUSED*/ static int -filt_solisten(struct knote *kn, long hint) +filt_soempty(struct knote *kn, long hint) { - struct socket *so = kn->kn_fp->f_data; + struct socket *so; + + so = kn->kn_fp->f_data; + SOCKBUF_LOCK_ASSERT(&so->so_snd); + kn->kn_data = sbused(&so->so_snd); - kn->kn_data = so->so_qlen; - return (!TAILQ_EMPTY(&so->so_comp)); + if (kn->kn_data == 0) + return (1); + else + return (0); } #ifndef __rtems__ diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c index c6fdb26f..41701401 100644 --- a/freebsd/sys/kern/uipc_syscalls.c +++ b/freebsd/sys/kern/uipc_syscalls.c @@ -166,14 +166,20 @@ static #endif /* __rtems__ */ int sys_socket(struct thread *td, struct socket_args *uap) +{ + + return (kern_socket(td, uap->domain, uap->type, uap->protocol)); +} + +int +kern_socket(struct thread *td, int domain, int type, int protocol) { struct socket *so; struct file *fp; - int fd, error, type, oflag, fflag; + int fd, error, oflag, fflag; - AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); + AUDIT_ARG_SOCKET(domain, type, protocol); - type = uap->type; oflag = 0; fflag = 0; if ((type & SOCK_CLOEXEC) != 0) { @@ -186,8 +192,7 @@ sys_socket(struct thread *td, struct socket_args *uap) } #ifdef MAC - error = mac_socket_check_create(td->td_ucred, uap->domain, type, - uap->protocol); + error = mac_socket_check_create(td->td_ucred, domain, type, protocol); if (error != 0) return (error); #endif @@ -195,8 +200,7 @@ sys_socket(struct thread *td, struct socket_args *uap) if (error != 0) return (error); /* An extra reference on `fp' has been held for us by falloc(). */ - error = socreate(uap->domain, &so, type, uap->protocol, - td->td_ucred, td); + error = socreate(domain, &so, type, protocol, td->td_ucred, td); if (error != 0) { fdclose(td, fp, fd); } else { @@ -328,14 +332,21 @@ sys_bindat(struct thread *td, struct bindat_args *uap) int sys_listen(struct thread *td, struct listen_args *uap) +{ + + return (kern_listen(td, uap->s, uap->backlog)); +} + +int +kern_listen(struct thread *td, int s, int backlog) { struct socket *so; struct file *fp; cap_rights_t rights; int error; - AUDIT_ARG_FD(uap->s); - error = getsock_cap(td, uap->s, cap_rights_init(&rights, CAP_LISTEN), + AUDIT_ARG_FD(s); + error = getsock_cap(td, s, cap_rights_init(&rights, CAP_LISTEN), &fp, NULL, NULL); if (error == 0) { so = fp->f_data; @@ -343,10 +354,10 @@ sys_listen(struct thread *td, struct listen_args *uap) error = mac_socket_check_listen(td->td_ucred, so); if (error == 0) #endif - error = solisten(so, uap->backlog, td); + error = solisten(so, backlog, td); fdrop(fp, td); } - return(error); + return (error); } #ifdef __rtems__ int @@ -1580,18 +1591,25 @@ static #endif /* __rtems__ */ int sys_shutdown(struct thread *td, struct shutdown_args *uap) +{ + + return (kern_shutdown(td, uap->s, uap->how)); +} + +int +kern_shutdown(struct thread *td, int s, int how) { struct socket *so; struct file *fp; cap_rights_t rights; int error; - AUDIT_ARG_FD(uap->s); - error = getsock_cap(td, uap->s, cap_rights_init(&rights, CAP_SHUTDOWN), + AUDIT_ARG_FD(s); + error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SHUTDOWN), &fp, NULL, NULL); if (error == 0) { so = fp->f_data; - error = soshutdown(so, uap->how); + error = soshutdown(so, how); #ifndef __rtems__ /* * Previous versions did not return ENOTCONN, but 0 in diff --git a/freebsd/sys/kern/uipc_usrreq.c b/freebsd/sys/kern/uipc_usrreq.c index 159de132..b3b55402 100644 --- a/freebsd/sys/kern/uipc_usrreq.c +++ b/freebsd/sys/kern/uipc_usrreq.c @@ -842,6 +842,9 @@ uipc_listen(struct socket *so, int backlog, struct thread *td) struct unpcb *unp; int error; + if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET) + return (EOPNOTSUPP); + unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_listen: unp == NULL")); @@ -2058,6 +2061,7 @@ unp_internalize(struct mbuf **controlp, struct thread *td) struct filedescent *fde, **fdep, *fdev; struct file *fp; struct timeval *tv; + struct timespec *ts; int i, *fdp; void *data; socklen_t clen = control->m_len, datalen; @@ -2178,6 +2182,30 @@ unp_internalize(struct mbuf **controlp, struct thread *td) bintime(bt); break; + case SCM_REALTIME: + *controlp = sbcreatecontrol(NULL, sizeof(*ts), + SCM_REALTIME, SOL_SOCKET); + if (*controlp == NULL) { + error = ENOBUFS; + goto out; + } + ts = (struct timespec *) + CMSG_DATA(mtod(*controlp, struct cmsghdr *)); + nanotime(ts); + break; + + case SCM_MONOTONIC: + *controlp = sbcreatecontrol(NULL, sizeof(*ts), + SCM_MONOTONIC, SOL_SOCKET); + if (*controlp == NULL) { + error = ENOBUFS; + goto out; + } + ts = (struct timespec *) + CMSG_DATA(mtod(*controlp, struct cmsghdr *)); + nanouptime(ts); + break; + default: error = EINVAL; goto out; diff --git a/freebsd/sys/libkern/bcd.c b/freebsd/sys/libkern/bcd.c index df4abad2..38e7d0c1 100644 --- a/freebsd/sys/libkern/bcd.c +++ b/freebsd/sys/libkern/bcd.c @@ -8,6 +8,7 @@ #include __FBSDID("$FreeBSD$"); +#include #include u_char const bcd2bin_data[] = { @@ -22,6 +23,7 @@ u_char const bcd2bin_data[] = { 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 0, 0, 0, 0, 0, 0, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 }; +CTASSERT(nitems(bcd2bin_data) == LIBKERN_LEN_BCD2BIN); u_char const bin2bcd_data[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, @@ -35,6 +37,8 @@ u_char const bin2bcd_data[] = { 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99 }; +CTASSERT(nitems(bin2bcd_data) == LIBKERN_LEN_BIN2BCD); /* This is actually used with radix [2..36] */ char const hex2ascii_data[] = "0123456789abcdefghijklmnopqrstuvwxyz"; +CTASSERT(nitems(hex2ascii_data) == LIBKERN_LEN_HEX2ASCII + 1); diff --git a/freebsd/sys/libkern/inet_ntoa.c b/freebsd/sys/libkern/inet_ntoa.c index 1d36ab29..1aa48077 100644 --- a/freebsd/sys/libkern/inet_ntoa.c +++ b/freebsd/sys/libkern/inet_ntoa.c @@ -37,20 +37,6 @@ __FBSDID("$FreeBSD$"); #include -char * -inet_ntoa(struct in_addr ina) -{ - static char buf[4*sizeof "123"]; - unsigned char *ucp = (unsigned char *)&ina; - - sprintf(buf, "%d.%d.%d.%d", - ucp[0] & 0xff, - ucp[1] & 0xff, - ucp[2] & 0xff, - ucp[3] & 0xff); - return buf; -} - char * inet_ntoa_r(struct in_addr ina, char *buf) { diff --git a/freebsd/sys/libkern/random.c b/freebsd/sys/libkern/random.c index 5b780670..efa9a70a 100644 --- a/freebsd/sys/libkern/random.c +++ b/freebsd/sys/libkern/random.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -59,7 +59,7 @@ srandom(seed) u_long random() { - register long x, hi, lo, t; + long x, hi, lo, t; /* * Compute x[n + 1] = (7^5 * x[n]) mod (2^31 - 1). diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c index e7822586..41e22db7 100644 --- a/freebsd/sys/net/bpf.c +++ b/freebsd/sys/net/bpf.c @@ -17,7 +17,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -2782,6 +2782,10 @@ bpf_ifdetach(void *arg __unused, struct ifnet *ifp) struct bpf_if *bp, *bp_temp; int nmatched = 0; + /* Ignore ifnet renaming. */ + if (ifp->if_flags & IFF_RENAMING) + return; + BPF_LOCK(); /* * Find matching entries in free list. diff --git a/freebsd/sys/net/bpf.h b/freebsd/sys/net/bpf.h index f707f436..e82ce184 100644 --- a/freebsd/sys/net/bpf.h +++ b/freebsd/sys/net/bpf.h @@ -15,7 +15,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -234,1056 +234,8 @@ struct bpf_zbuf_header { u_int _bzh_pad[5]; }; -/* - * Data-link level type codes. - */ -#define DLT_NULL 0 /* BSD loopback encapsulation */ -#define DLT_EN10MB 1 /* Ethernet (10Mb) */ -#define DLT_EN3MB 2 /* Experimental Ethernet (3Mb) */ -#define DLT_AX25 3 /* Amateur Radio AX.25 */ -#define DLT_PRONET 4 /* Proteon ProNET Token Ring */ -#define DLT_CHAOS 5 /* Chaos */ -#define DLT_IEEE802 6 /* IEEE 802 Networks */ -#define DLT_ARCNET 7 /* ARCNET */ -#define DLT_SLIP 8 /* Serial Line IP */ -#define DLT_PPP 9 /* Point-to-point Protocol */ -#define DLT_FDDI 10 /* FDDI */ -#define DLT_ATM_RFC1483 11 /* LLC/SNAP encapsulated atm */ -#define DLT_RAW 12 /* raw IP */ - -/* - * These are values from BSD/OS's "bpf.h". - * These are not the same as the values from the traditional libpcap - * "bpf.h"; however, these values shouldn't be generated by any - * OS other than BSD/OS, so the correct values to use here are the - * BSD/OS values. - * - * Platforms that have already assigned these values to other - * DLT_ codes, however, should give these codes the values - * from that platform, so that programs that use these codes will - * continue to compile - even though they won't correctly read - * files of these types. - */ -#define DLT_SLIP_BSDOS 15 /* BSD/OS Serial Line IP */ -#define DLT_PPP_BSDOS 16 /* BSD/OS Point-to-point Protocol */ - -#define DLT_ATM_CLIP 19 /* Linux Classical-IP over ATM */ - -/* - * These values are defined by NetBSD; other platforms should refrain from - * using them for other purposes, so that NetBSD savefiles with link - * types of 50 or 51 can be read as this type on all platforms. - */ -#define DLT_PPP_SERIAL 50 /* PPP over serial with HDLC encapsulation */ -#define DLT_PPP_ETHER 51 /* PPP over Ethernet */ - -/* - * Reserved for the Symantec Enterprise Firewall. - */ -#define DLT_SYMANTEC_FIREWALL 99 - -/* - * Values between 100 and 103 are used in capture file headers as - * link-layer header type LINKTYPE_ values corresponding to DLT_ types - * that differ between platforms; don't use those values for new DLT_ - * new types. - */ - -/* - * Values starting with 104 are used for newly-assigned link-layer - * header type values; for those link-layer header types, the DLT_ - * value returned by pcap_datalink() and passed to pcap_open_dead(), - * and the LINKTYPE_ value that appears in capture files, are the - * same. - * - * DLT_MATCHING_MIN is the lowest such value; DLT_MATCHING_MAX is - * the highest such value. - */ -#define DLT_MATCHING_MIN 104 - -/* - * This value was defined by libpcap 0.5; platforms that have defined - * it with a different value should define it here with that value - - * a link type of 104 in a save file will be mapped to DLT_C_HDLC, - * whatever value that happens to be, so programs will correctly - * handle files with that link type regardless of the value of - * DLT_C_HDLC. - * - * The name DLT_C_HDLC was used by BSD/OS; we use that name for source - * compatibility with programs written for BSD/OS. - * - * libpcap 0.5 defined it as DLT_CHDLC; we define DLT_CHDLC as well, - * for source compatibility with programs written for libpcap 0.5. - */ -#define DLT_C_HDLC 104 /* Cisco HDLC */ -#define DLT_CHDLC DLT_C_HDLC - -#define DLT_IEEE802_11 105 /* IEEE 802.11 wireless */ - -/* - * Values between 106 and 107 are used in capture file headers as - * link-layer types corresponding to DLT_ types that might differ - * between platforms; don't use those values for new DLT_ new types. - */ - -/* - * Frame Relay; BSD/OS has a DLT_FR with a value of 11, but that collides - * with other values. - * DLT_FR and DLT_FRELAY packets start with the Q.922 Frame Relay header - * (DLCI, etc.). - */ -#define DLT_FRELAY 107 - -/* - * OpenBSD DLT_LOOP, for loopback devices; it's like DLT_NULL, except - * that the AF_ type in the link-layer header is in network byte order. - * - * OpenBSD defines it as 12, but that collides with DLT_RAW, so we - * define it as 108 here. If OpenBSD picks up this file, it should - * define DLT_LOOP as 12 in its version, as per the comment above - - * and should not use 108 as a DLT_ value. - */ -#define DLT_LOOP 108 - -/* - * Values between 109 and 112 are used in capture file headers as - * link-layer types corresponding to DLT_ types that might differ - * between platforms; don't use those values for new DLT_ new types. - */ - -/* - * Encapsulated packets for IPsec; DLT_ENC is 13 in OpenBSD, but that's - * DLT_SLIP_BSDOS in NetBSD, so we don't use 13 for it in OSes other - * than OpenBSD. - */ -#define DLT_ENC 109 - -/* - * This is for Linux cooked sockets. - */ -#define DLT_LINUX_SLL 113 - -/* - * Apple LocalTalk hardware. - */ -#define DLT_LTALK 114 - -/* - * Acorn Econet. - */ -#define DLT_ECONET 115 - -/* - * Reserved for use with OpenBSD ipfilter. - */ -#define DLT_IPFILTER 116 - -/* - * Reserved for use in capture-file headers as a link-layer type - * corresponding to OpenBSD DLT_PFLOG; DLT_PFLOG is 17 in OpenBSD, - * but that's DLT_LANE8023 in SuSE 6.3, so we can't use 17 for it - * in capture-file headers. - */ -#define DLT_PFLOG 117 - -/* - * Registered for Cisco-internal use. - */ -#define DLT_CISCO_IOS 118 - -/* - * Reserved for 802.11 cards using the Prism II chips, with a link-layer - * header including Prism monitor mode information plus an 802.11 - * header. - */ -#define DLT_PRISM_HEADER 119 - -/* - * Reserved for Aironet 802.11 cards, with an Aironet link-layer header - * (see Doug Ambrisko's FreeBSD patches). - */ -#define DLT_AIRONET_HEADER 120 - -/* - * Reserved for use by OpenBSD's pfsync device. - */ -#define DLT_PFSYNC 121 - -/* - * Reserved for Siemens HiPath HDLC. XXX - */ -#define DLT_HHDLC 121 - -/* - * Reserved for RFC 2625 IP-over-Fibre Channel. - */ -#define DLT_IP_OVER_FC 122 - -/* - * Reserved for Full Frontal ATM on Solaris. - */ -#define DLT_SUNATM 123 - -/* - * Reserved as per request from Kent Dahlgren - * for private use. - */ -#define DLT_RIO 124 /* RapidIO */ -#define DLT_PCI_EXP 125 /* PCI Express */ -#define DLT_AURORA 126 /* Xilinx Aurora link layer */ - -/* - * BSD header for 802.11 plus a number of bits of link-layer information - * including radio information. - */ -#ifndef DLT_IEEE802_11_RADIO -#define DLT_IEEE802_11_RADIO 127 -#endif - -/* - * Reserved for TZSP encapsulation. - */ -#define DLT_TZSP 128 /* Tazmen Sniffer Protocol */ - -/* - * Reserved for Linux ARCNET. - */ -#define DLT_ARCNET_LINUX 129 - -/* - * Juniper-private data link types. - */ -#define DLT_JUNIPER_MLPPP 130 -#define DLT_JUNIPER_MLFR 131 -#define DLT_JUNIPER_ES 132 -#define DLT_JUNIPER_GGSN 133 -#define DLT_JUNIPER_MFR 134 -#define DLT_JUNIPER_ATM2 135 -#define DLT_JUNIPER_SERVICES 136 -#define DLT_JUNIPER_ATM1 137 - -/* - * Apple IP-over-IEEE 1394, as per a request from Dieter Siegmund - * . The header that's presented is an Ethernet-like - * header: - * - * #define FIREWIRE_EUI64_LEN 8 - * struct firewire_header { - * u_char firewire_dhost[FIREWIRE_EUI64_LEN]; - * u_char firewire_shost[FIREWIRE_EUI64_LEN]; - * u_short firewire_type; - * }; - * - * with "firewire_type" being an Ethernet type value, rather than, - * for example, raw GASP frames being handed up. - */ -#define DLT_APPLE_IP_OVER_IEEE1394 138 - -/* - * Various SS7 encapsulations, as per a request from Jeff Morriss - * and subsequent discussions. - */ -#define DLT_MTP2_WITH_PHDR 139 /* pseudo-header with various info, followed by MTP2 */ -#define DLT_MTP2 140 /* MTP2, without pseudo-header */ -#define DLT_MTP3 141 /* MTP3, without pseudo-header or MTP2 */ -#define DLT_SCCP 142 /* SCCP, without pseudo-header or MTP2 or MTP3 */ - -/* - * Reserved for DOCSIS. - */ -#define DLT_DOCSIS 143 - -/* - * Reserved for Linux IrDA. - */ -#define DLT_LINUX_IRDA 144 - -/* - * Reserved for IBM SP switch and IBM Next Federation switch. - */ -#define DLT_IBM_SP 145 -#define DLT_IBM_SN 146 - -/* - * Reserved for private use. If you have some link-layer header type - * that you want to use within your organization, with the capture files - * using that link-layer header type not ever be sent outside your - * organization, you can use these values. - * - * No libpcap release will use these for any purpose, nor will any - * tcpdump release use them, either. - * - * Do *NOT* use these in capture files that you expect anybody not using - * your private versions of capture-file-reading tools to read; in - * particular, do *NOT* use them in products, otherwise you may find that - * people won't be able to use tcpdump, or snort, or Ethereal, or... to - * read capture files from your firewall/intrusion detection/traffic - * monitoring/etc. appliance, or whatever product uses that DLT_ value, - * and you may also find that the developers of those applications will - * not accept patches to let them read those files. - * - * Also, do not use them if somebody might send you a capture using them - * for *their* private type and tools using them for *your* private type - * would have to read them. - * - * Instead, ask "tcpdump-workers@tcpdump.org" for a new DLT_ value, - * as per the comment above, and use the type you're given. - */ -#define DLT_USER0 147 -#define DLT_USER1 148 -#define DLT_USER2 149 -#define DLT_USER3 150 -#define DLT_USER4 151 -#define DLT_USER5 152 -#define DLT_USER6 153 -#define DLT_USER7 154 -#define DLT_USER8 155 -#define DLT_USER9 156 -#define DLT_USER10 157 -#define DLT_USER11 158 -#define DLT_USER12 159 -#define DLT_USER13 160 -#define DLT_USER14 161 -#define DLT_USER15 162 - -/* - * For future use with 802.11 captures - defined by AbsoluteValue - * Systems to store a number of bits of link-layer information - * including radio information: - * - * http://www.shaftnet.org/~pizza/software/capturefrm.txt - * - * but it might be used by some non-AVS drivers now or in the - * future. - */ -#define DLT_IEEE802_11_RADIO_AVS 163 /* 802.11 plus AVS radio header */ - -/* - * Juniper-private data link type, as per request from - * Hannes Gredler . The DLT_s are used - * for passing on chassis-internal metainformation such as - * QOS profiles, etc.. - */ -#define DLT_JUNIPER_MONITOR 164 - -/* - * Reserved for BACnet MS/TP. - */ -#define DLT_BACNET_MS_TP 165 - -/* - * Another PPP variant as per request from Karsten Keil . - * - * This is used in some OSes to allow a kernel socket filter to distinguish - * between incoming and outgoing packets, on a socket intended to - * supply pppd with outgoing packets so it can do dial-on-demand and - * hangup-on-lack-of-demand; incoming packets are filtered out so they - * don't cause pppd to hold the connection up (you don't want random - * input packets such as port scans, packets from old lost connections, - * etc. to force the connection to stay up). - * - * The first byte of the PPP header (0xff03) is modified to accommodate - * the direction - 0x00 = IN, 0x01 = OUT. - */ -#define DLT_PPP_PPPD 166 - -/* - * Names for backwards compatibility with older versions of some PPP - * software; new software should use DLT_PPP_PPPD. - */ -#define DLT_PPP_WITH_DIRECTION DLT_PPP_PPPD -#define DLT_LINUX_PPP_WITHDIRECTION DLT_PPP_PPPD - -/* - * Juniper-private data link type, as per request from - * Hannes Gredler . The DLT_s are used - * for passing on chassis-internal metainformation such as - * QOS profiles, cookies, etc.. - */ -#define DLT_JUNIPER_PPPOE 167 -#define DLT_JUNIPER_PPPOE_ATM 168 - -#define DLT_GPRS_LLC 169 /* GPRS LLC */ -#define DLT_GPF_T 170 /* GPF-T (ITU-T G.7041/Y.1303) */ -#define DLT_GPF_F 171 /* GPF-F (ITU-T G.7041/Y.1303) */ - -/* - * Requested by Oolan Zimmer for use in Gcom's T1/E1 line - * monitoring equipment. - */ -#define DLT_GCOM_T1E1 172 -#define DLT_GCOM_SERIAL 173 - -/* - * Juniper-private data link type, as per request from - * Hannes Gredler . The DLT_ is used - * for internal communication to Physical Interface Cards (PIC) - */ -#define DLT_JUNIPER_PIC_PEER 174 - -/* - * Link types requested by Gregor Maier of Endace - * Measurement Systems. They add an ERF header (see - * http://www.endace.com/support/EndaceRecordFormat.pdf) in front of - * the link-layer header. - */ -#define DLT_ERF_ETH 175 /* Ethernet */ -#define DLT_ERF_POS 176 /* Packet-over-SONET */ - -/* - * Requested by Daniele Orlandi for raw LAPD - * for vISDN (http://www.orlandi.com/visdn/). Its link-layer header - * includes additional information before the LAPD header, so it's - * not necessarily a generic LAPD header. - */ -#define DLT_LINUX_LAPD 177 - -/* - * Juniper-private data link type, as per request from - * Hannes Gredler . - * The DLT_ are used for prepending meta-information - * like interface index, interface name - * before standard Ethernet, PPP, Frelay & C-HDLC Frames - */ -#define DLT_JUNIPER_ETHER 178 -#define DLT_JUNIPER_PPP 179 -#define DLT_JUNIPER_FRELAY 180 -#define DLT_JUNIPER_CHDLC 181 - -/* - * Multi Link Frame Relay (FRF.16) - */ -#define DLT_MFR 182 - -/* - * Juniper-private data link type, as per request from - * Hannes Gredler . - * The DLT_ is used for internal communication with a - * voice Adapter Card (PIC) - */ -#define DLT_JUNIPER_VP 183 - -/* - * Arinc 429 frames. - * DLT_ requested by Gianluca Varenni . - * Every frame contains a 32bit A429 label. - * More documentation on Arinc 429 can be found at - * http://www.condoreng.com/support/downloads/tutorials/ARINCTutorial.pdf - */ -#define DLT_A429 184 - -/* - * Arinc 653 Interpartition Communication messages. - * DLT_ requested by Gianluca Varenni . - * Please refer to the A653-1 standard for more information. - */ -#define DLT_A653_ICM 185 - -/* - * USB packets, beginning with a USB setup header; requested by - * Paolo Abeni . - */ -#define DLT_USB 186 - -/* - * Bluetooth HCI UART transport layer (part H:4); requested by - * Paolo Abeni. - */ -#define DLT_BLUETOOTH_HCI_H4 187 - -/* - * IEEE 802.16 MAC Common Part Sublayer; requested by Maria Cruz - * . - */ -#define DLT_IEEE802_16_MAC_CPS 188 - -/* - * USB packets, beginning with a Linux USB header; requested by - * Paolo Abeni . - */ -#define DLT_USB_LINUX 189 - -/* - * Controller Area Network (CAN) v. 2.0B packets. - * DLT_ requested by Gianluca Varenni . - * Used to dump CAN packets coming from a CAN Vector board. - * More documentation on the CAN v2.0B frames can be found at - * http://www.can-cia.org/downloads/?269 - */ -#define DLT_CAN20B 190 - -/* - * IEEE 802.15.4, with address fields padded, as is done by Linux - * drivers; requested by Juergen Schimmer. - */ -#define DLT_IEEE802_15_4_LINUX 191 - -/* - * Per Packet Information encapsulated packets. - * DLT_ requested by Gianluca Varenni . - */ -#define DLT_PPI 192 - -/* - * Header for 802.16 MAC Common Part Sublayer plus a radiotap radio header; - * requested by Charles Clancy. - */ -#define DLT_IEEE802_16_MAC_CPS_RADIO 193 - -/* - * Juniper-private data link type, as per request from - * Hannes Gredler . - * The DLT_ is used for internal communication with a - * integrated service module (ISM). - */ -#define DLT_JUNIPER_ISM 194 - -/* - * IEEE 802.15.4, exactly as it appears in the spec (no padding, no - * nothing); requested by Mikko Saarnivala . - */ -#define DLT_IEEE802_15_4 195 - -/* - * Various link-layer types, with a pseudo-header, for SITA - * (http://www.sita.aero/); requested by Fulko Hew (fulko.hew@gmail.com). - */ -#define DLT_SITA 196 - -/* - * Various link-layer types, with a pseudo-header, for Endace DAG cards; - * encapsulates Endace ERF records. Requested by Stephen Donnelly - * . - */ -#define DLT_ERF 197 - -/* - * Special header prepended to Ethernet packets when capturing from a - * u10 Networks board. Requested by Phil Mulholland - * . - */ -#define DLT_RAIF1 198 - -/* - * IPMB packet for IPMI, beginning with the I2C slave address, followed - * by the netFn and LUN, etc.. Requested by Chanthy Toeung - * . - */ -#define DLT_IPMB 199 - -/* - * Juniper-private data link type, as per request from - * Hannes Gredler . - * The DLT_ is used for capturing data on a secure tunnel interface. - */ -#define DLT_JUNIPER_ST 200 - -/* - * Bluetooth HCI UART transport layer (part H:4), with pseudo-header - * that includes direction information; requested by Paolo Abeni. - */ -#define DLT_BLUETOOTH_HCI_H4_WITH_PHDR 201 - -/* - * AX.25 packet with a 1-byte KISS header; see - * - * http://www.ax25.net/kiss.htm - * - * as per Richard Stearn . - */ -#define DLT_AX25_KISS 202 - -/* - * LAPD packets from an ISDN channel, starting with the address field, - * with no pseudo-header. - * Requested by Varuna De Silva . - */ -#define DLT_LAPD 203 - -/* - * Variants of various link-layer headers, with a one-byte direction - * pseudo-header prepended - zero means "received by this host", - * non-zero (any non-zero value) means "sent by this host" - as per - * Will Barker . - */ -#define DLT_PPP_WITH_DIR 204 /* PPP - don't confuse with DLT_PPP_WITH_DIRECTION */ -#define DLT_C_HDLC_WITH_DIR 205 /* Cisco HDLC */ -#define DLT_FRELAY_WITH_DIR 206 /* Frame Relay */ -#define DLT_LAPB_WITH_DIR 207 /* LAPB */ - -/* - * 208 is reserved for an as-yet-unspecified proprietary link-layer - * type, as requested by Will Barker. - */ - -/* - * IPMB with a Linux-specific pseudo-header; as requested by Alexey Neyman - * . - */ -#define DLT_IPMB_LINUX 209 - -/* - * FlexRay automotive bus - http://www.flexray.com/ - as requested - * by Hannes Kaelber . - */ -#define DLT_FLEXRAY 210 - -/* - * Media Oriented Systems Transport (MOST) bus for multimedia - * transport - http://www.mostcooperation.com/ - as requested - * by Hannes Kaelber . - */ -#define DLT_MOST 211 - -/* - * Local Interconnect Network (LIN) bus for vehicle networks - - * http://www.lin-subbus.org/ - as requested by Hannes Kaelber - * . - */ -#define DLT_LIN 212 - -/* - * X2E-private data link type used for serial line capture, - * as requested by Hannes Kaelber . - */ -#define DLT_X2E_SERIAL 213 - -/* - * X2E-private data link type used for the Xoraya data logger - * family, as requested by Hannes Kaelber . - */ -#define DLT_X2E_XORAYA 214 - -/* - * IEEE 802.15.4, exactly as it appears in the spec (no padding, no - * nothing), but with the PHY-level data for non-ASK PHYs (4 octets - * of 0 as preamble, one octet of SFD, one octet of frame length+ - * reserved bit, and then the MAC-layer data, starting with the - * frame control field). - * - * Requested by Max Filippov . - */ -#define DLT_IEEE802_15_4_NONASK_PHY 215 - -/* - * David Gibson requested this for - * captures from the Linux kernel /dev/input/eventN devices. This - * is used to communicate keystrokes and mouse movements from the - * Linux kernel to display systems, such as Xorg. - */ -#define DLT_LINUX_EVDEV 216 - -/* - * GSM Um and Abis interfaces, preceded by a "gsmtap" header. - * - * Requested by Harald Welte . - */ -#define DLT_GSMTAP_UM 217 -#define DLT_GSMTAP_ABIS 218 - -/* - * MPLS, with an MPLS label as the link-layer header. - * Requested by Michele Marchetto on behalf - * of OpenBSD. - */ -#define DLT_MPLS 219 - -/* - * USB packets, beginning with a Linux USB header, with the USB header - * padded to 64 bytes; required for memory-mapped access. - */ -#define DLT_USB_LINUX_MMAPPED 220 - -/* - * DECT packets, with a pseudo-header; requested by - * Matthias Wenzel . - */ -#define DLT_DECT 221 -/* - * From: "Lidwa, Eric (GSFC-582.0)[SGT INC]" - * Date: Mon, 11 May 2009 11:18:30 -0500 - * - * DLT_AOS. We need it for AOS Space Data Link Protocol. - * I have already written dissectors for but need an OK from - * legal before I can submit a patch. - * - */ -#define DLT_AOS 222 - -/* - * Wireless HART (Highway Addressable Remote Transducer) - * From the HART Communication Foundation - * IES/PAS 62591 - * - * Requested by Sam Roberts . - */ -#define DLT_WIHART 223 - -/* - * Fibre Channel FC-2 frames, beginning with a Frame_Header. - * Requested by Kahou Lei . - */ -#define DLT_FC_2 224 - -/* - * Fibre Channel FC-2 frames, beginning with an encoding of the - * SOF, and ending with an encoding of the EOF. - * - * The encodings represent the frame delimiters as 4-byte sequences - * representing the corresponding ordered sets, with K28.5 - * represented as 0xBC, and the D symbols as the corresponding - * byte values; for example, SOFi2, which is K28.5 - D21.5 - D1.2 - D21.2, - * is represented as 0xBC 0xB5 0x55 0x55. - * - * Requested by Kahou Lei . - */ -#define DLT_FC_2_WITH_FRAME_DELIMS 225 -/* - * Solaris ipnet pseudo-header; requested by Darren Reed . - * - * The pseudo-header starts with a one-byte version number; for version 2, - * the pseudo-header is: - * - * struct dl_ipnetinfo { - * u_int8_t dli_version; - * u_int8_t dli_family; - * u_int16_t dli_htype; - * u_int32_t dli_pktlen; - * u_int32_t dli_ifindex; - * u_int32_t dli_grifindex; - * u_int32_t dli_zsrc; - * u_int32_t dli_zdst; - * }; - * - * dli_version is 2 for the current version of the pseudo-header. - * - * dli_family is a Solaris address family value, so it's 2 for IPv4 - * and 26 for IPv6. - * - * dli_htype is a "hook type" - 0 for incoming packets, 1 for outgoing - * packets, and 2 for packets arriving from another zone on the same - * machine. - * - * dli_pktlen is the length of the packet data following the pseudo-header - * (so the captured length minus dli_pktlen is the length of the - * pseudo-header, assuming the entire pseudo-header was captured). - * - * dli_ifindex is the interface index of the interface on which the - * packet arrived. - * - * dli_grifindex is the group interface index number (for IPMP interfaces). - * - * dli_zsrc is the zone identifier for the source of the packet. - * - * dli_zdst is the zone identifier for the destination of the packet. - * - * A zone number of 0 is the global zone; a zone number of 0xffffffff - * means that the packet arrived from another host on the network, not - * from another zone on the same machine. - * - * An IPv4 or IPv6 datagram follows the pseudo-header; dli_family indicates - * which of those it is. - */ -#define DLT_IPNET 226 - -/* - * CAN (Controller Area Network) frames, with a pseudo-header as supplied - * by Linux SocketCAN. See Documentation/networking/can.txt in the Linux - * source. - * - * Requested by Felix Obenhuber . - */ -#define DLT_CAN_SOCKETCAN 227 - -/* - * Raw IPv4/IPv6; different from DLT_RAW in that the DLT_ value specifies - * whether it's v4 or v6. Requested by Darren Reed . - */ -#define DLT_IPV4 228 -#define DLT_IPV6 229 - -/* - * IEEE 802.15.4, exactly as it appears in the spec (no padding, no - * nothing), and with no FCS at the end of the frame; requested by - * Jon Smirl . - */ -#define DLT_IEEE802_15_4_NOFCS 230 - -/* - * Raw D-Bus: - * - * http://www.freedesktop.org/wiki/Software/dbus - * - * messages: - * - * http://dbus.freedesktop.org/doc/dbus-specification.html#message-protocol-messages - * - * starting with the endianness flag, followed by the message type, etc., - * but without the authentication handshake before the message sequence: - * - * http://dbus.freedesktop.org/doc/dbus-specification.html#auth-protocol - * - * Requested by Martin Vidner . - */ -#define DLT_DBUS 231 - -/* - * Juniper-private data link type, as per request from - * Hannes Gredler . - */ -#define DLT_JUNIPER_VS 232 -#define DLT_JUNIPER_SRX_E2E 233 -#define DLT_JUNIPER_FIBRECHANNEL 234 - -/* - * DVB-CI (DVB Common Interface for communication between a PC Card - * module and a DVB receiver). See - * - * http://www.kaiser.cx/pcap-dvbci.html - * - * for the specification. - * - * Requested by Martin Kaiser . - */ -#define DLT_DVB_CI 235 - -/* - * Variant of 3GPP TS 27.010 multiplexing protocol (similar to, but - * *not* the same as, 27.010). Requested by Hans-Christoph Schemmel - * . - */ -#define DLT_MUX27010 236 - -/* - * STANAG 5066 D_PDUs. Requested by M. Baris Demiray - * . - */ -#define DLT_STANAG_5066_D_PDU 237 - -/* - * Juniper-private data link type, as per request from - * Hannes Gredler . - */ -#define DLT_JUNIPER_ATM_CEMIC 238 - -/* - * NetFilter LOG messages - * (payload of netlink NFNL_SUBSYS_ULOG/NFULNL_MSG_PACKET packets) - * - * Requested by Jakub Zawadzki - */ -#define DLT_NFLOG 239 - -/* - * Hilscher Gesellschaft fuer Systemautomation mbH link-layer type - * for Ethernet packets with a 4-byte pseudo-header and always - * with the payload including the FCS, as supplied by their - * netANALYZER hardware and software. - * - * Requested by Holger P. Frommer - */ -#define DLT_NETANALYZER 240 - -/* - * Hilscher Gesellschaft fuer Systemautomation mbH link-layer type - * for Ethernet packets with a 4-byte pseudo-header and FCS and - * with the Ethernet header preceded by 7 bytes of preamble and - * 1 byte of SFD, as supplied by their netANALYZER hardware and - * software. - * - * Requested by Holger P. Frommer - */ -#define DLT_NETANALYZER_TRANSPARENT 241 - -/* - * IP-over-InfiniBand, as specified by RFC 4391. - * - * Requested by Petr Sumbera . - */ -#define DLT_IPOIB 242 - -/* - * MPEG-2 transport stream (ISO 13818-1/ITU-T H.222.0). - * - * Requested by Guy Martin . - */ -#define DLT_MPEG_2_TS 243 - -/* - * ng4T GmbH's UMTS Iub/Iur-over-ATM and Iub/Iur-over-IP format as - * used by their ng40 protocol tester. - * - * Requested by Jens Grimmer . - */ -#define DLT_NG40 244 - -/* - * Pseudo-header giving adapter number and flags, followed by an NFC - * (Near-Field Communications) Logical Link Control Protocol (LLCP) PDU, - * as specified by NFC Forum Logical Link Control Protocol Technical - * Specification LLCP 1.1. - * - * Requested by Mike Wakerly . - */ -#define DLT_NFC_LLCP 245 - -/* - * 245 is used as LINKTYPE_PFSYNC; do not use it for any other purpose. - * - * DLT_PFSYNC has different values on different platforms, and all of - * them collide with something used elsewhere. On platforms that - * don't already define it, define it as 245. - */ -#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__) && !defined(__APPLE__) -#define DLT_PFSYNC 246 -#endif - -/* - * Raw InfiniBand packets, starting with the Local Routing Header. - * - * Requested by Oren Kladnitsky . - */ -#define DLT_INFINIBAND 247 - -/* - * SCTP, with no lower-level protocols (i.e., no IPv4 or IPv6). - * - * Requested by Michael Tuexen . - */ -#define DLT_SCTP 248 - -/* - * USB packets, beginning with a USBPcap header. - * - * Requested by Tomasz Mon - */ -#define DLT_USBPCAP 249 - -/* - * Schweitzer Engineering Laboratories "RTAC" product serial-line - * packets. - * - * Requested by Chris Bontje . - */ -#define DLT_RTAC_SERIAL 250 - -/* - * Bluetooth Low Energy air interface link-layer packets. - * - * Requested by Mike Kershaw . - */ -#define DLT_BLUETOOTH_LE_LL 251 - -/* - * DLT type for upper-protocol layer PDU saves from wireshark. - * - * the actual contents are determined by two TAGs stored with each - * packet: - * EXP_PDU_TAG_LINKTYPE the link type (LINKTYPE_ value) of the - * original packet. - * - * EXP_PDU_TAG_PROTO_NAME the name of the wireshark dissector - * that can make sense of the data stored. - */ -#define DLT_WIRESHARK_UPPER_PDU 252 - -/* - * DLT type for the netlink protocol (nlmon devices). - */ -#define DLT_NETLINK 253 - -/* - * Bluetooth Linux Monitor headers for the BlueZ stack. - */ -#define DLT_BLUETOOTH_LINUX_MONITOR 254 - -/* - * Bluetooth Basic Rate/Enhanced Data Rate baseband packets, as - * captured by Ubertooth. - */ -#define DLT_BLUETOOTH_BREDR_BB 255 - -/* - * Bluetooth Low Energy link layer packets, as captured by Ubertooth. - */ -#define DLT_BLUETOOTH_LE_LL_WITH_PHDR 256 - -/* - * PROFIBUS data link layer. - */ -#define DLT_PROFIBUS_DL 257 - -/* - * Apple's DLT_PKTAP headers. - * - * Sadly, the folks at Apple either had no clue that the DLT_USERn values - * are for internal use within an organization and partners only, and - * didn't know that the right way to get a link-layer header type is to - * ask tcpdump.org for one, or knew and didn't care, so they just - * used DLT_USER2, which causes problems for everything except for - * their version of tcpdump. - * - * So I'll just give them one; hopefully this will show up in a - * libpcap release in time for them to get this into 10.10 Big Sur - * or whatever Mavericks' successor is called. LINKTYPE_PKTAP - * will be 258 *even on OS X*; that is *intentional*, so that - * PKTAP files look the same on *all* OSes (different OSes can have - * different numerical values for a given DLT_, but *MUST NOT* have - * different values for what goes in a file, as files can be moved - * between OSes!). - * - * When capturing, on a system with a Darwin-based OS, on a device - * that returns 149 (DLT_USER2 and Apple's DLT_PKTAP) with this - * version of libpcap, the DLT_ value for the pcap_t will be DLT_PKTAP, - * and that will continue to be DLT_USER2 on Darwin-based OSes. That way, - * binary compatibility with Mavericks is preserved for programs using - * this version of libpcap. This does mean that if you were using - * DLT_USER2 for some capture device on OS X, you can't do so with - * this version of libpcap, just as you can't with Apple's libpcap - - * on OS X, they define DLT_PKTAP to be DLT_USER2, so programs won't - * be able to distinguish between PKTAP and whatever you were using - * DLT_USER2 for. - * - * If the program saves the capture to a file using this version of - * libpcap's pcap_dump code, the LINKTYPE_ value in the file will be - * LINKTYPE_PKTAP, which will be 258, even on Darwin-based OSes. - * That way, the file will *not* be a DLT_USER2 file. That means - * that the latest version of tcpdump, when built with this version - * of libpcap, and sufficiently recent versions of Wireshark will - * be able to read those files and interpret them correctly; however, - * Apple's version of tcpdump in OS X 10.9 won't be able to handle - * them. (Hopefully, Apple will pick up this version of libpcap, - * and the corresponding version of tcpdump, so that tcpdump will - * be able to handle the old LINKTYPE_USER2 captures *and* the new - * LINKTYPE_PKTAP captures.) - */ -#ifdef __APPLE__ -#define DLT_PKTAP DLT_USER2 -#else -#define DLT_PKTAP 258 -#endif - -/* - * Ethernet packets preceded by a header giving the last 6 octets - * of the preamble specified by 802.3-2012 Clause 65, section - * 65.1.3.2 "Transmit". - */ -#define DLT_EPON 259 - -/* - * IPMI trace packets, as specified by Table 3-20 "Trace Data Block Format" - * in the PICMG HPM.2 specification. - */ -#define DLT_IPMI_HPM_2 260 - -#define DLT_MATCHING_MAX 260 /* highest value in the "matching" range */ - -/* - * DLT and savefile link type values are split into a class and - * a member of that class. A class value of 0 indicates a regular - * DLT_/LINKTYPE_ value. - */ -#define DLT_CLASS(x) ((x) & 0x03ff0000) +/* Pull in data-link level type codes. */ +#include /* * The instruction encodings. diff --git a/freebsd/sys/net/bpf_buffer.c b/freebsd/sys/net/bpf_buffer.c index d42df1b0..0bfb9c06 100644 --- a/freebsd/sys/net/bpf_buffer.c +++ b/freebsd/sys/net/bpf_buffer.c @@ -44,7 +44,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/bpf_filter.c b/freebsd/sys/net/bpf_filter.c index ecfb3d14..9e5fbb0f 100644 --- a/freebsd/sys/net/bpf_filter.c +++ b/freebsd/sys/net/bpf_filter.c @@ -17,7 +17,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/bpfdesc.h b/freebsd/sys/net/bpfdesc.h index 60ed11f9..7c13f375 100644 --- a/freebsd/sys/net/bpfdesc.h +++ b/freebsd/sys/net/bpfdesc.h @@ -15,7 +15,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/dlt.h b/freebsd/sys/net/dlt.h new file mode 100644 index 00000000..dc818521 --- /dev/null +++ b/freebsd/sys/net/dlt.h @@ -0,0 +1,1338 @@ +/*- + * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from the Stanford/CMU enet packet filter, + * (net/enet.c) distributed as part of 4.3BSD, and code contributed + * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence + * Berkeley Laboratory. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)bpf.h 7.1 (Berkeley) 5/7/91 + * + * $FreeBSD$ + */ + +#ifndef _NET_DLT_H_ +#define _NET_DLT_H_ + +/* + * Link-layer header type codes. + * + * Do *NOT* add new values to this list without asking + * "tcpdump-workers@lists.tcpdump.org" for a value. Otherwise, you run + * the risk of using a value that's already being used for some other + * purpose, and of having tools that read libpcap-format captures not + * being able to handle captures with your new DLT_ value, with no hope + * that they will ever be changed to do so (as that would destroy their + * ability to read captures using that value for that other purpose). + * + * See + * + * http://www.tcpdump.org/linktypes.html + * + * for detailed descriptions of some of these link-layer header types. + */ + +/* + * These are the types that are the same on all platforms, and that + * have been defined by for ages. + */ +#define DLT_NULL 0 /* BSD loopback encapsulation */ +#define DLT_EN10MB 1 /* Ethernet (10Mb) */ +#define DLT_EN3MB 2 /* Experimental Ethernet (3Mb) */ +#define DLT_AX25 3 /* Amateur Radio AX.25 */ +#define DLT_PRONET 4 /* Proteon ProNET Token Ring */ +#define DLT_CHAOS 5 /* Chaos */ +#define DLT_IEEE802 6 /* 802.5 Token Ring */ +#define DLT_ARCNET 7 /* ARCNET, with BSD-style header */ +#define DLT_SLIP 8 /* Serial Line IP */ +#define DLT_PPP 9 /* Point-to-point Protocol */ +#define DLT_FDDI 10 /* FDDI */ + +/* + * These are types that are different on some platforms, and that + * have been defined by for ages. We use #ifdefs to + * detect the BSDs that define them differently from the traditional + * libpcap + * + * XXX - DLT_ATM_RFC1483 is 13 in BSD/OS, and DLT_RAW is 14 in BSD/OS, + * but I don't know what the right #define is for BSD/OS. + */ +#define DLT_ATM_RFC1483 11 /* LLC-encapsulated ATM */ + +#ifdef __OpenBSD__ +#define DLT_RAW 14 /* raw IP */ +#else +#define DLT_RAW 12 /* raw IP */ +#endif + +/* + * Given that the only OS that currently generates BSD/OS SLIP or PPP + * is, well, BSD/OS, arguably everybody should have chosen its values + * for DLT_SLIP_BSDOS and DLT_PPP_BSDOS, which are 15 and 16, but they + * didn't. So it goes. + */ +#if defined(__NetBSD__) || defined(__FreeBSD__) +#ifndef DLT_SLIP_BSDOS +#define DLT_SLIP_BSDOS 13 /* BSD/OS Serial Line IP */ +#define DLT_PPP_BSDOS 14 /* BSD/OS Point-to-point Protocol */ +#endif +#else +#define DLT_SLIP_BSDOS 15 /* BSD/OS Serial Line IP */ +#define DLT_PPP_BSDOS 16 /* BSD/OS Point-to-point Protocol */ +#endif + +/* + * 17 was used for DLT_PFLOG in OpenBSD; it no longer is. + * + * It was DLT_LANE8023 in SuSE 6.3, so we defined LINKTYPE_PFLOG + * as 117 so that pflog captures would use a link-layer header type + * value that didn't collide with any other values. On all + * platforms other than OpenBSD, we defined DLT_PFLOG as 117, + * and we mapped between LINKTYPE_PFLOG and DLT_PFLOG. + * + * OpenBSD eventually switched to using 117 for DLT_PFLOG as well. + * + * Don't use 17 for anything else. + */ + +/* + * 18 is used for DLT_PFSYNC in OpenBSD, NetBSD, DragonFly BSD and + * Mac OS X; don't use it for anything else. (FreeBSD uses 121, + * which collides with DLT_HHDLC, even though it doesn't use 18 + * for anything and doesn't appear to have ever used it for anything.) + * + * We define it as 18 on those platforms; it is, unfortunately, used + * for DLT_CIP in Suse 6.3, so we don't define it as DLT_PFSYNC + * in general. As the packet format for it, like that for + * DLT_PFLOG, is not only OS-dependent but OS-version-dependent, + * we don't support printing it in tcpdump except on OSes that + * have the relevant header files, so it's not that useful on + * other platforms. + */ +#if defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__) +#define DLT_PFSYNC 18 +#endif + +#define DLT_ATM_CLIP 19 /* Linux Classical-IP over ATM */ + +/* + * Apparently Redback uses this for its SmartEdge 400/800. I hope + * nobody else decided to use it, too. + */ +#define DLT_REDBACK_SMARTEDGE 32 + +/* + * These values are defined by NetBSD; other platforms should refrain from + * using them for other purposes, so that NetBSD savefiles with link + * types of 50 or 51 can be read as this type on all platforms. + */ +#define DLT_PPP_SERIAL 50 /* PPP over serial with HDLC encapsulation */ +#define DLT_PPP_ETHER 51 /* PPP over Ethernet */ + +/* + * The Axent Raptor firewall - now the Symantec Enterprise Firewall - uses + * a link-layer type of 99 for the tcpdump it supplies. The link-layer + * header has 6 bytes of unknown data, something that appears to be an + * Ethernet type, and 36 bytes that appear to be 0 in at least one capture + * I've seen. + */ +#define DLT_SYMANTEC_FIREWALL 99 + +/* + * Values between 100 and 103 are used in capture file headers as + * link-layer header type LINKTYPE_ values corresponding to DLT_ types + * that differ between platforms; don't use those values for new DLT_ + * new types. + */ + +/* + * Values starting with 104 are used for newly-assigned link-layer + * header type values; for those link-layer header types, the DLT_ + * value returned by pcap_datalink() and passed to pcap_open_dead(), + * and the LINKTYPE_ value that appears in capture files, are the + * same. + * + * DLT_MATCHING_MIN is the lowest such value; DLT_MATCHING_MAX is + * the highest such value. + */ +#define DLT_MATCHING_MIN 104 + +/* + * This value was defined by libpcap 0.5; platforms that have defined + * it with a different value should define it here with that value - + * a link type of 104 in a save file will be mapped to DLT_C_HDLC, + * whatever value that happens to be, so programs will correctly + * handle files with that link type regardless of the value of + * DLT_C_HDLC. + * + * The name DLT_C_HDLC was used by BSD/OS; we use that name for source + * compatibility with programs written for BSD/OS. + * + * libpcap 0.5 defined it as DLT_CHDLC; we define DLT_CHDLC as well, + * for source compatibility with programs written for libpcap 0.5. + */ +#define DLT_C_HDLC 104 /* Cisco HDLC */ +#define DLT_CHDLC DLT_C_HDLC + +#define DLT_IEEE802_11 105 /* IEEE 802.11 wireless */ + +/* + * 106 is reserved for Linux Classical IP over ATM; it's like DLT_RAW, + * except when it isn't. (I.e., sometimes it's just raw IP, and + * sometimes it isn't.) We currently handle it as DLT_LINUX_SLL, + * so that we don't have to worry about the link-layer header.) + */ + +/* + * Frame Relay; BSD/OS has a DLT_FR with a value of 11, but that collides + * with other values. + * DLT_FR and DLT_FRELAY packets start with the Q.922 Frame Relay header + * (DLCI, etc.). + */ +#define DLT_FRELAY 107 + +/* + * OpenBSD DLT_LOOP, for loopback devices; it's like DLT_NULL, except + * that the AF_ type in the link-layer header is in network byte order. + * + * DLT_LOOP is 12 in OpenBSD, but that's DLT_RAW in other OSes, so + * we don't use 12 for it in OSes other than OpenBSD. + */ +#ifdef __OpenBSD__ +#define DLT_LOOP 12 +#else +#define DLT_LOOP 108 +#endif + +/* + * Encapsulated packets for IPsec; DLT_ENC is 13 in OpenBSD, but that's + * DLT_SLIP_BSDOS in NetBSD, so we don't use 13 for it in OSes other + * than OpenBSD. + */ +#ifdef __OpenBSD__ +#define DLT_ENC 13 +#else +#define DLT_ENC 109 +#endif + +/* + * Values between 110 and 112 are reserved for use in capture file headers + * as link-layer types corresponding to DLT_ types that might differ + * between platforms; don't use those values for new DLT_ types + * other than the corresponding DLT_ types. + */ + +/* + * This is for Linux cooked sockets. + */ +#define DLT_LINUX_SLL 113 + +/* + * Apple LocalTalk hardware. + */ +#define DLT_LTALK 114 + +/* + * Acorn Econet. + */ +#define DLT_ECONET 115 + +/* + * Reserved for use with OpenBSD ipfilter. + */ +#define DLT_IPFILTER 116 + +/* + * OpenBSD DLT_PFLOG. + */ +#define DLT_PFLOG 117 + +/* + * Registered for Cisco-internal use. + */ +#define DLT_CISCO_IOS 118 + +/* + * For 802.11 cards using the Prism II chips, with a link-layer + * header including Prism monitor mode information plus an 802.11 + * header. + */ +#define DLT_PRISM_HEADER 119 + +/* + * Reserved for Aironet 802.11 cards, with an Aironet link-layer header + * (see Doug Ambrisko's FreeBSD patches). + */ +#define DLT_AIRONET_HEADER 120 + +/* + * Sigh. + * + * 121 was reserved for Siemens HiPath HDLC on 2002-01-25, as + * requested by Tomas Kukosa. + * + * On 2004-02-25, a FreeBSD checkin to sys/net/bpf.h was made that + * assigned 121 as DLT_PFSYNC. In current versions, its libpcap + * does DLT_ <-> LINKTYPE_ mapping, mapping DLT_PFSYNC to a + * LINKTYPE_PFSYNC value of 246, so it should write out DLT_PFSYNC + * dump files with 246 as the link-layer header type. (Earlier + * versions might not have done mapping, in which case they would + * have written them out with a link-layer header type of 121.) + * + * OpenBSD, from which pf came, however, uses 18 for DLT_PFSYNC; + * its libpcap does no DLT_ <-> LINKTYPE_ mapping, so it would + * write out DLT_PFSYNC dump files with use 18 as the link-layer + * header type. + * + * NetBSD, DragonFly BSD, and Darwin also use 18 for DLT_PFSYNC; in + * current versions, their libpcaps do DLT_ <-> LINKTYPE_ mapping, + * mapping DLT_PFSYNC to a LINKTYPE_PFSYNC value of 246, so they + * should write out DLT_PFSYNC dump files with 246 as the link-layer + * header type. (Earlier versions might not have done mapping, + * in which case they'd work the same way OpenBSD does, writing + * them out with a link-layer header type of 18.) + * + * We'll define DLT_PFSYNC as: + * + * 18 on NetBSD, OpenBSD, DragonFly BSD, and Darwin; + * + * 121 on FreeBSD; + * + * 246 everywhere else. + * + * We'll define DLT_HHDLC as 121 on everything except for FreeBSD; + * anybody who wants to compile, on FreeBSD, code that uses DLT_HHDLC + * is out of luck. + * + * We'll define LINKTYPE_PFSYNC as 246 on *all* platforms, so that + * savefiles written using *this* code won't use 18 or 121 for PFSYNC, + * they'll all use 246. + * + * Code that uses pcap_datalink() to determine the link-layer header + * type of a savefile won't, when built and run on FreeBSD, be able + * to distinguish between LINKTYPE_PFSYNC and LINKTYPE_HHDLC capture + * files, as pcap_datalink() will give 121 for both of them. Code + * that doesn't, such as the code in Wireshark, will be able to + * distinguish between them. + * + * FreeBSD's libpcap won't map a link-layer header type of 18 - i.e., + * DLT_PFSYNC files from OpenBSD and possibly older versions of NetBSD, + * DragonFly BSD, and OS X - to DLT_PFSYNC, so code built with FreeBSD's + * libpcap won't treat those files as DLT_PFSYNC files. + * + * Other libpcaps won't map a link-layer header type of 121 to DLT_PFSYNC; + * this means they can read DLT_HHDLC files, if any exist, but won't + * treat pcap files written by any older versions of FreeBSD libpcap that + * didn't map to 246 as DLT_PFSYNC files. + */ +#ifdef __FreeBSD__ +#define DLT_PFSYNC 121 +#else +#define DLT_HHDLC 121 +#endif + +/* + * This is for RFC 2625 IP-over-Fibre Channel. + * + * This is not for use with raw Fibre Channel, where the link-layer + * header starts with a Fibre Channel frame header; it's for IP-over-FC, + * where the link-layer header starts with an RFC 2625 Network_Header + * field. + */ +#define DLT_IP_OVER_FC 122 + +/* + * This is for Full Frontal ATM on Solaris with SunATM, with a + * pseudo-header followed by an AALn PDU. + * + * There may be other forms of Full Frontal ATM on other OSes, + * with different pseudo-headers. + * + * If ATM software returns a pseudo-header with VPI/VCI information + * (and, ideally, packet type information, e.g. signalling, ILMI, + * LANE, LLC-multiplexed traffic, etc.), it should not use + * DLT_ATM_RFC1483, but should get a new DLT_ value, so tcpdump + * and the like don't have to infer the presence or absence of a + * pseudo-header and the form of the pseudo-header. + */ +#define DLT_SUNATM 123 /* Solaris+SunATM */ + +/* + * Reserved as per request from Kent Dahlgren + * for private use. + */ +#define DLT_RIO 124 /* RapidIO */ +#define DLT_PCI_EXP 125 /* PCI Express */ +#define DLT_AURORA 126 /* Xilinx Aurora link layer */ + +/* + * Header for 802.11 plus a number of bits of link-layer information + * including radio information, used by some recent BSD drivers as + * well as the madwifi Atheros driver for Linux. + */ +#define DLT_IEEE802_11_RADIO 127 /* 802.11 plus radiotap radio header */ + +/* + * Reserved for the TZSP encapsulation, as per request from + * Chris Waters + * TZSP is a generic encapsulation for any other link type, + * which includes a means to include meta-information + * with the packet, e.g. signal strength and channel + * for 802.11 packets. + */ +#define DLT_TZSP 128 /* Tazmen Sniffer Protocol */ + +/* + * BSD's ARCNET headers have the source host, destination host, + * and type at the beginning of the packet; that's what's handed + * up to userland via BPF. + * + * Linux's ARCNET headers, however, have a 2-byte offset field + * between the host IDs and the type; that's what's handed up + * to userland via PF_PACKET sockets. + * + * We therefore have to have separate DLT_ values for them. + */ +#define DLT_ARCNET_LINUX 129 /* ARCNET */ + +/* + * Juniper-private data link types, as per request from + * Hannes Gredler . The DLT_s are used + * for passing on chassis-internal metainformation such as + * QOS profiles, etc.. + */ +#define DLT_JUNIPER_MLPPP 130 +#define DLT_JUNIPER_MLFR 131 +#define DLT_JUNIPER_ES 132 +#define DLT_JUNIPER_GGSN 133 +#define DLT_JUNIPER_MFR 134 +#define DLT_JUNIPER_ATM2 135 +#define DLT_JUNIPER_SERVICES 136 +#define DLT_JUNIPER_ATM1 137 + +/* + * Apple IP-over-IEEE 1394, as per a request from Dieter Siegmund + * . The header that's presented is an Ethernet-like + * header: + * + * #define FIREWIRE_EUI64_LEN 8 + * struct firewire_header { + * u_char firewire_dhost[FIREWIRE_EUI64_LEN]; + * u_char firewire_shost[FIREWIRE_EUI64_LEN]; + * u_short firewire_type; + * }; + * + * with "firewire_type" being an Ethernet type value, rather than, + * for example, raw GASP frames being handed up. + */ +#define DLT_APPLE_IP_OVER_IEEE1394 138 + +/* + * Various SS7 encapsulations, as per a request from Jeff Morriss + * and subsequent discussions. + */ +#define DLT_MTP2_WITH_PHDR 139 /* pseudo-header with various info, followed by MTP2 */ +#define DLT_MTP2 140 /* MTP2, without pseudo-header */ +#define DLT_MTP3 141 /* MTP3, without pseudo-header or MTP2 */ +#define DLT_SCCP 142 /* SCCP, without pseudo-header or MTP2 or MTP3 */ + +/* + * DOCSIS MAC frames. + */ +#define DLT_DOCSIS 143 + +/* + * Linux-IrDA packets. Protocol defined at http://www.irda.org. + * Those packets include IrLAP headers and above (IrLMP...), but + * don't include Phy framing (SOF/EOF/CRC & byte stuffing), because Phy + * framing can be handled by the hardware and depend on the bitrate. + * This is exactly the format you would get capturing on a Linux-IrDA + * interface (irdaX), but not on a raw serial port. + * Note the capture is done in "Linux-cooked" mode, so each packet include + * a fake packet header (struct sll_header). This is because IrDA packet + * decoding is dependant on the direction of the packet (incomming or + * outgoing). + * When/if other platform implement IrDA capture, we may revisit the + * issue and define a real DLT_IRDA... + * Jean II + */ +#define DLT_LINUX_IRDA 144 + +/* + * Reserved for IBM SP switch and IBM Next Federation switch. + */ +#define DLT_IBM_SP 145 +#define DLT_IBM_SN 146 + +/* + * Reserved for private use. If you have some link-layer header type + * that you want to use within your organization, with the capture files + * using that link-layer header type not ever be sent outside your + * organization, you can use these values. + * + * No libpcap release will use these for any purpose, nor will any + * tcpdump release use them, either. + * + * Do *NOT* use these in capture files that you expect anybody not using + * your private versions of capture-file-reading tools to read; in + * particular, do *NOT* use them in products, otherwise you may find that + * people won't be able to use tcpdump, or snort, or Ethereal, or... to + * read capture files from your firewall/intrusion detection/traffic + * monitoring/etc. appliance, or whatever product uses that DLT_ value, + * and you may also find that the developers of those applications will + * not accept patches to let them read those files. + * + * Also, do not use them if somebody might send you a capture using them + * for *their* private type and tools using them for *your* private type + * would have to read them. + * + * Instead, ask "tcpdump-workers@lists.tcpdump.org" for a new DLT_ value, + * as per the comment above, and use the type you're given. + */ +#define DLT_USER0 147 +#define DLT_USER1 148 +#define DLT_USER2 149 +#define DLT_USER3 150 +#define DLT_USER4 151 +#define DLT_USER5 152 +#define DLT_USER6 153 +#define DLT_USER7 154 +#define DLT_USER8 155 +#define DLT_USER9 156 +#define DLT_USER10 157 +#define DLT_USER11 158 +#define DLT_USER12 159 +#define DLT_USER13 160 +#define DLT_USER14 161 +#define DLT_USER15 162 + +/* + * For future use with 802.11 captures - defined by AbsoluteValue + * Systems to store a number of bits of link-layer information + * including radio information: + * + * http://www.shaftnet.org/~pizza/software/capturefrm.txt + * + * but it might be used by some non-AVS drivers now or in the + * future. + */ +#define DLT_IEEE802_11_RADIO_AVS 163 /* 802.11 plus AVS radio header */ + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . The DLT_s are used + * for passing on chassis-internal metainformation such as + * QOS profiles, etc.. + */ +#define DLT_JUNIPER_MONITOR 164 + +/* + * BACnet MS/TP frames. + */ +#define DLT_BACNET_MS_TP 165 + +/* + * Another PPP variant as per request from Karsten Keil . + * + * This is used in some OSes to allow a kernel socket filter to distinguish + * between incoming and outgoing packets, on a socket intended to + * supply pppd with outgoing packets so it can do dial-on-demand and + * hangup-on-lack-of-demand; incoming packets are filtered out so they + * don't cause pppd to hold the connection up (you don't want random + * input packets such as port scans, packets from old lost connections, + * etc. to force the connection to stay up). + * + * The first byte of the PPP header (0xff03) is modified to accomodate + * the direction - 0x00 = IN, 0x01 = OUT. + */ +#define DLT_PPP_PPPD 166 + +/* + * Names for backwards compatibility with older versions of some PPP + * software; new software should use DLT_PPP_PPPD. + */ +#define DLT_PPP_WITH_DIRECTION DLT_PPP_PPPD +#define DLT_LINUX_PPP_WITHDIRECTION DLT_PPP_PPPD + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . The DLT_s are used + * for passing on chassis-internal metainformation such as + * QOS profiles, cookies, etc.. + */ +#define DLT_JUNIPER_PPPOE 167 +#define DLT_JUNIPER_PPPOE_ATM 168 + +#define DLT_GPRS_LLC 169 /* GPRS LLC */ +#define DLT_GPF_T 170 /* GPF-T (ITU-T G.7041/Y.1303) */ +#define DLT_GPF_F 171 /* GPF-F (ITU-T G.7041/Y.1303) */ + +/* + * Requested by Oolan Zimmer for use in Gcom's T1/E1 line + * monitoring equipment. + */ +#define DLT_GCOM_T1E1 172 +#define DLT_GCOM_SERIAL 173 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . The DLT_ is used + * for internal communication to Physical Interface Cards (PIC) + */ +#define DLT_JUNIPER_PIC_PEER 174 + +/* + * Link types requested by Gregor Maier of Endace + * Measurement Systems. They add an ERF header (see + * http://www.endace.com/support/EndaceRecordFormat.pdf) in front of + * the link-layer header. + */ +#define DLT_ERF_ETH 175 /* Ethernet */ +#define DLT_ERF_POS 176 /* Packet-over-SONET */ + +/* + * Requested by Daniele Orlandi for raw LAPD + * for vISDN (http://www.orlandi.com/visdn/). Its link-layer header + * includes additional information before the LAPD header, so it's + * not necessarily a generic LAPD header. + */ +#define DLT_LINUX_LAPD 177 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . + * The DLT_ are used for prepending meta-information + * like interface index, interface name + * before standard Ethernet, PPP, Frelay & C-HDLC Frames + */ +#define DLT_JUNIPER_ETHER 178 +#define DLT_JUNIPER_PPP 179 +#define DLT_JUNIPER_FRELAY 180 +#define DLT_JUNIPER_CHDLC 181 + +/* + * Multi Link Frame Relay (FRF.16) + */ +#define DLT_MFR 182 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . + * The DLT_ is used for internal communication with a + * voice Adapter Card (PIC) + */ +#define DLT_JUNIPER_VP 183 + +/* + * Arinc 429 frames. + * DLT_ requested by Gianluca Varenni . + * Every frame contains a 32bit A429 label. + * More documentation on Arinc 429 can be found at + * http://www.condoreng.com/support/downloads/tutorials/ARINCTutorial.pdf + */ +#define DLT_A429 184 + +/* + * Arinc 653 Interpartition Communication messages. + * DLT_ requested by Gianluca Varenni . + * Please refer to the A653-1 standard for more information. + */ +#define DLT_A653_ICM 185 + +/* + * This used to be "USB packets, beginning with a USB setup header; + * requested by Paolo Abeni ." + * + * However, that header didn't work all that well - it left out some + * useful information - and was abandoned in favor of the DLT_USB_LINUX + * header. + * + * This is now used by FreeBSD for its BPF taps for USB; that has its + * own headers. So it is written, so it is done. + * + * For source-code compatibility, we also define DLT_USB to have this + * value. We do it numerically so that, if code that includes this + * file (directly or indirectly) also includes an OS header that also + * defines DLT_USB as 186, we don't get a redefinition warning. + * (NetBSD 7 does that.) + */ +#define DLT_USB_FREEBSD 186 +#define DLT_USB 186 + +/* + * Bluetooth HCI UART transport layer (part H:4); requested by + * Paolo Abeni. + */ +#define DLT_BLUETOOTH_HCI_H4 187 + +/* + * IEEE 802.16 MAC Common Part Sublayer; requested by Maria Cruz + * . + */ +#define DLT_IEEE802_16_MAC_CPS 188 + +/* + * USB packets, beginning with a Linux USB header; requested by + * Paolo Abeni . + */ +#define DLT_USB_LINUX 189 + +/* + * Controller Area Network (CAN) v. 2.0B packets. + * DLT_ requested by Gianluca Varenni . + * Used to dump CAN packets coming from a CAN Vector board. + * More documentation on the CAN v2.0B frames can be found at + * http://www.can-cia.org/downloads/?269 + */ +#define DLT_CAN20B 190 + +/* + * IEEE 802.15.4, with address fields padded, as is done by Linux + * drivers; requested by Juergen Schimmer. + */ +#define DLT_IEEE802_15_4_LINUX 191 + +/* + * Per Packet Information encapsulated packets. + * DLT_ requested by Gianluca Varenni . + */ +#define DLT_PPI 192 + +/* + * Header for 802.16 MAC Common Part Sublayer plus a radiotap radio header; + * requested by Charles Clancy. + */ +#define DLT_IEEE802_16_MAC_CPS_RADIO 193 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . + * The DLT_ is used for internal communication with a + * integrated service module (ISM). + */ +#define DLT_JUNIPER_ISM 194 + +/* + * IEEE 802.15.4, exactly as it appears in the spec (no padding, no + * nothing); requested by Mikko Saarnivala . + * For this one, we expect the FCS to be present at the end of the frame; + * if the frame has no FCS, DLT_IEEE802_15_4_NOFCS should be used. + */ +#define DLT_IEEE802_15_4 195 + +/* + * Various link-layer types, with a pseudo-header, for SITA + * (http://www.sita.aero/); requested by Fulko Hew (fulko.hew@gmail.com). + */ +#define DLT_SITA 196 + +/* + * Various link-layer types, with a pseudo-header, for Endace DAG cards; + * encapsulates Endace ERF records. Requested by Stephen Donnelly + * . + */ +#define DLT_ERF 197 + +/* + * Special header prepended to Ethernet packets when capturing from a + * u10 Networks board. Requested by Phil Mulholland + * . + */ +#define DLT_RAIF1 198 + +/* + * IPMB packet for IPMI, beginning with the I2C slave address, followed + * by the netFn and LUN, etc.. Requested by Chanthy Toeung + * . + */ +#define DLT_IPMB 199 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . + * The DLT_ is used for capturing data on a secure tunnel interface. + */ +#define DLT_JUNIPER_ST 200 + +/* + * Bluetooth HCI UART transport layer (part H:4), with pseudo-header + * that includes direction information; requested by Paolo Abeni. + */ +#define DLT_BLUETOOTH_HCI_H4_WITH_PHDR 201 + +/* + * AX.25 packet with a 1-byte KISS header; see + * + * http://www.ax25.net/kiss.htm + * + * as per Richard Stearn . + */ +#define DLT_AX25_KISS 202 + +/* + * LAPD packets from an ISDN channel, starting with the address field, + * with no pseudo-header. + * Requested by Varuna De Silva . + */ +#define DLT_LAPD 203 + +/* + * Variants of various link-layer headers, with a one-byte direction + * pseudo-header prepended - zero means "received by this host", + * non-zero (any non-zero value) means "sent by this host" - as per + * Will Barker . + */ +#define DLT_PPP_WITH_DIR 204 /* PPP - don't confuse with DLT_PPP_WITH_DIRECTION */ +#define DLT_C_HDLC_WITH_DIR 205 /* Cisco HDLC */ +#define DLT_FRELAY_WITH_DIR 206 /* Frame Relay */ +#define DLT_LAPB_WITH_DIR 207 /* LAPB */ + +/* + * 208 is reserved for an as-yet-unspecified proprietary link-layer + * type, as requested by Will Barker. + */ + +/* + * IPMB with a Linux-specific pseudo-header; as requested by Alexey Neyman + * . + */ +#define DLT_IPMB_LINUX 209 + +/* + * FlexRay automotive bus - http://www.flexray.com/ - as requested + * by Hannes Kaelber . + */ +#define DLT_FLEXRAY 210 + +/* + * Media Oriented Systems Transport (MOST) bus for multimedia + * transport - http://www.mostcooperation.com/ - as requested + * by Hannes Kaelber . + */ +#define DLT_MOST 211 + +/* + * Local Interconnect Network (LIN) bus for vehicle networks - + * http://www.lin-subbus.org/ - as requested by Hannes Kaelber + * . + */ +#define DLT_LIN 212 + +/* + * X2E-private data link type used for serial line capture, + * as requested by Hannes Kaelber . + */ +#define DLT_X2E_SERIAL 213 + +/* + * X2E-private data link type used for the Xoraya data logger + * family, as requested by Hannes Kaelber . + */ +#define DLT_X2E_XORAYA 214 + +/* + * IEEE 802.15.4, exactly as it appears in the spec (no padding, no + * nothing), but with the PHY-level data for non-ASK PHYs (4 octets + * of 0 as preamble, one octet of SFD, one octet of frame length+ + * reserved bit, and then the MAC-layer data, starting with the + * frame control field). + * + * Requested by Max Filippov . + */ +#define DLT_IEEE802_15_4_NONASK_PHY 215 + +/* + * David Gibson requested this for + * captures from the Linux kernel /dev/input/eventN devices. This + * is used to communicate keystrokes and mouse movements from the + * Linux kernel to display systems, such as Xorg. + */ +#define DLT_LINUX_EVDEV 216 + +/* + * GSM Um and Abis interfaces, preceded by a "gsmtap" header. + * + * Requested by Harald Welte . + */ +#define DLT_GSMTAP_UM 217 +#define DLT_GSMTAP_ABIS 218 + +/* + * MPLS, with an MPLS label as the link-layer header. + * Requested by Michele Marchetto on behalf + * of OpenBSD. + */ +#define DLT_MPLS 219 + +/* + * USB packets, beginning with a Linux USB header, with the USB header + * padded to 64 bytes; required for memory-mapped access. + */ +#define DLT_USB_LINUX_MMAPPED 220 + +/* + * DECT packets, with a pseudo-header; requested by + * Matthias Wenzel . + */ +#define DLT_DECT 221 + +/* + * From: "Lidwa, Eric (GSFC-582.0)[SGT INC]" + * Date: Mon, 11 May 2009 11:18:30 -0500 + * + * DLT_AOS. We need it for AOS Space Data Link Protocol. + * I have already written dissectors for but need an OK from + * legal before I can submit a patch. + * + */ +#define DLT_AOS 222 + +/* + * Wireless HART (Highway Addressable Remote Transducer) + * From the HART Communication Foundation + * IES/PAS 62591 + * + * Requested by Sam Roberts . + */ +#define DLT_WIHART 223 + +/* + * Fibre Channel FC-2 frames, beginning with a Frame_Header. + * Requested by Kahou Lei . + */ +#define DLT_FC_2 224 + +/* + * Fibre Channel FC-2 frames, beginning with an encoding of the + * SOF, and ending with an encoding of the EOF. + * + * The encodings represent the frame delimiters as 4-byte sequences + * representing the corresponding ordered sets, with K28.5 + * represented as 0xBC, and the D symbols as the corresponding + * byte values; for example, SOFi2, which is K28.5 - D21.5 - D1.2 - D21.2, + * is represented as 0xBC 0xB5 0x55 0x55. + * + * Requested by Kahou Lei . + */ +#define DLT_FC_2_WITH_FRAME_DELIMS 225 + +/* + * Solaris ipnet pseudo-header; requested by Darren Reed . + * + * The pseudo-header starts with a one-byte version number; for version 2, + * the pseudo-header is: + * + * struct dl_ipnetinfo { + * u_int8_t dli_version; + * u_int8_t dli_family; + * u_int16_t dli_htype; + * u_int32_t dli_pktlen; + * u_int32_t dli_ifindex; + * u_int32_t dli_grifindex; + * u_int32_t dli_zsrc; + * u_int32_t dli_zdst; + * }; + * + * dli_version is 2 for the current version of the pseudo-header. + * + * dli_family is a Solaris address family value, so it's 2 for IPv4 + * and 26 for IPv6. + * + * dli_htype is a "hook type" - 0 for incoming packets, 1 for outgoing + * packets, and 2 for packets arriving from another zone on the same + * machine. + * + * dli_pktlen is the length of the packet data following the pseudo-header + * (so the captured length minus dli_pktlen is the length of the + * pseudo-header, assuming the entire pseudo-header was captured). + * + * dli_ifindex is the interface index of the interface on which the + * packet arrived. + * + * dli_grifindex is the group interface index number (for IPMP interfaces). + * + * dli_zsrc is the zone identifier for the source of the packet. + * + * dli_zdst is the zone identifier for the destination of the packet. + * + * A zone number of 0 is the global zone; a zone number of 0xffffffff + * means that the packet arrived from another host on the network, not + * from another zone on the same machine. + * + * An IPv4 or IPv6 datagram follows the pseudo-header; dli_family indicates + * which of those it is. + */ +#define DLT_IPNET 226 + +/* + * CAN (Controller Area Network) frames, with a pseudo-header as supplied + * by Linux SocketCAN, and with multi-byte numerical fields in that header + * in big-endian byte order. + * + * See Documentation/networking/can.txt in the Linux source. + * + * Requested by Felix Obenhuber . + */ +#define DLT_CAN_SOCKETCAN 227 + +/* + * Raw IPv4/IPv6; different from DLT_RAW in that the DLT_ value specifies + * whether it's v4 or v6. Requested by Darren Reed . + */ +#define DLT_IPV4 228 +#define DLT_IPV6 229 + +/* + * IEEE 802.15.4, exactly as it appears in the spec (no padding, no + * nothing), and with no FCS at the end of the frame; requested by + * Jon Smirl . + */ +#define DLT_IEEE802_15_4_NOFCS 230 + +/* + * Raw D-Bus: + * + * http://www.freedesktop.org/wiki/Software/dbus + * + * messages: + * + * http://dbus.freedesktop.org/doc/dbus-specification.html#message-protocol-messages + * + * starting with the endianness flag, followed by the message type, etc., + * but without the authentication handshake before the message sequence: + * + * http://dbus.freedesktop.org/doc/dbus-specification.html#auth-protocol + * + * Requested by Martin Vidner . + */ +#define DLT_DBUS 231 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . + */ +#define DLT_JUNIPER_VS 232 +#define DLT_JUNIPER_SRX_E2E 233 +#define DLT_JUNIPER_FIBRECHANNEL 234 + +/* + * DVB-CI (DVB Common Interface for communication between a PC Card + * module and a DVB receiver). See + * + * http://www.kaiser.cx/pcap-dvbci.html + * + * for the specification. + * + * Requested by Martin Kaiser . + */ +#define DLT_DVB_CI 235 + +/* + * Variant of 3GPP TS 27.010 multiplexing protocol (similar to, but + * *not* the same as, 27.010). Requested by Hans-Christoph Schemmel + * . + */ +#define DLT_MUX27010 236 + +/* + * STANAG 5066 D_PDUs. Requested by M. Baris Demiray + * . + */ +#define DLT_STANAG_5066_D_PDU 237 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . + */ +#define DLT_JUNIPER_ATM_CEMIC 238 + +/* + * NetFilter LOG messages + * (payload of netlink NFNL_SUBSYS_ULOG/NFULNL_MSG_PACKET packets) + * + * Requested by Jakub Zawadzki + */ +#define DLT_NFLOG 239 + +/* + * Hilscher Gesellschaft fuer Systemautomation mbH link-layer type + * for Ethernet packets with a 4-byte pseudo-header and always + * with the payload including the FCS, as supplied by their + * netANALYZER hardware and software. + * + * Requested by Holger P. Frommer + */ +#define DLT_NETANALYZER 240 + +/* + * Hilscher Gesellschaft fuer Systemautomation mbH link-layer type + * for Ethernet packets with a 4-byte pseudo-header and FCS and + * with the Ethernet header preceded by 7 bytes of preamble and + * 1 byte of SFD, as supplied by their netANALYZER hardware and + * software. + * + * Requested by Holger P. Frommer + */ +#define DLT_NETANALYZER_TRANSPARENT 241 + +/* + * IP-over-InfiniBand, as specified by RFC 4391. + * + * Requested by Petr Sumbera . + */ +#define DLT_IPOIB 242 + +/* + * MPEG-2 transport stream (ISO 13818-1/ITU-T H.222.0). + * + * Requested by Guy Martin . + */ +#define DLT_MPEG_2_TS 243 + +/* + * ng4T GmbH's UMTS Iub/Iur-over-ATM and Iub/Iur-over-IP format as + * used by their ng40 protocol tester. + * + * Requested by Jens Grimmer . + */ +#define DLT_NG40 244 + +/* + * Pseudo-header giving adapter number and flags, followed by an NFC + * (Near-Field Communications) Logical Link Control Protocol (LLCP) PDU, + * as specified by NFC Forum Logical Link Control Protocol Technical + * Specification LLCP 1.1. + * + * Requested by Mike Wakerly . + */ +#define DLT_NFC_LLCP 245 + +/* + * 246 is used as LINKTYPE_PFSYNC; do not use it for any other purpose. + * + * DLT_PFSYNC has different values on different platforms, and all of + * them collide with something used elsewhere. On platforms that + * don't already define it, define it as 246. + */ +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__) && !defined(__APPLE__) +#define DLT_PFSYNC 246 +#endif + +/* + * Raw InfiniBand packets, starting with the Local Routing Header. + * + * Requested by Oren Kladnitsky . + */ +#define DLT_INFINIBAND 247 + +/* + * SCTP, with no lower-level protocols (i.e., no IPv4 or IPv6). + * + * Requested by Michael Tuexen . + */ +#define DLT_SCTP 248 + +/* + * USB packets, beginning with a USBPcap header. + * + * Requested by Tomasz Mon + */ +#define DLT_USBPCAP 249 + +/* + * Schweitzer Engineering Laboratories "RTAC" product serial-line + * packets. + * + * Requested by Chris Bontje . + */ +#define DLT_RTAC_SERIAL 250 + +/* + * Bluetooth Low Energy air interface link-layer packets. + * + * Requested by Mike Kershaw . + */ +#define DLT_BLUETOOTH_LE_LL 251 + +/* + * DLT type for upper-protocol layer PDU saves from wireshark. + * + * the actual contents are determined by two TAGs stored with each + * packet: + * EXP_PDU_TAG_LINKTYPE the link type (LINKTYPE_ value) of the + * original packet. + * + * EXP_PDU_TAG_PROTO_NAME the name of the wireshark dissector + * that can make sense of the data stored. + */ +#define DLT_WIRESHARK_UPPER_PDU 252 + +/* + * DLT type for the netlink protocol (nlmon devices). + */ +#define DLT_NETLINK 253 + +/* + * Bluetooth Linux Monitor headers for the BlueZ stack. + */ +#define DLT_BLUETOOTH_LINUX_MONITOR 254 + +/* + * Bluetooth Basic Rate/Enhanced Data Rate baseband packets, as + * captured by Ubertooth. + */ +#define DLT_BLUETOOTH_BREDR_BB 255 + +/* + * Bluetooth Low Energy link layer packets, as captured by Ubertooth. + */ +#define DLT_BLUETOOTH_LE_LL_WITH_PHDR 256 + +/* + * PROFIBUS data link layer. + */ +#define DLT_PROFIBUS_DL 257 + +/* + * Apple's DLT_PKTAP headers. + * + * Sadly, the folks at Apple either had no clue that the DLT_USERn values + * are for internal use within an organization and partners only, and + * didn't know that the right way to get a link-layer header type is to + * ask tcpdump.org for one, or knew and didn't care, so they just + * used DLT_USER2, which causes problems for everything except for + * their version of tcpdump. + * + * So I'll just give them one; hopefully this will show up in a + * libpcap release in time for them to get this into 10.10 Big Sur + * or whatever Mavericks' successor is called. LINKTYPE_PKTAP + * will be 258 *even on OS X*; that is *intentional*, so that + * PKTAP files look the same on *all* OSes (different OSes can have + * different numerical values for a given DLT_, but *MUST NOT* have + * different values for what goes in a file, as files can be moved + * between OSes!). + * + * When capturing, on a system with a Darwin-based OS, on a device + * that returns 149 (DLT_USER2 and Apple's DLT_PKTAP) with this + * version of libpcap, the DLT_ value for the pcap_t will be DLT_PKTAP, + * and that will continue to be DLT_USER2 on Darwin-based OSes. That way, + * binary compatibility with Mavericks is preserved for programs using + * this version of libpcap. This does mean that if you were using + * DLT_USER2 for some capture device on OS X, you can't do so with + * this version of libpcap, just as you can't with Apple's libpcap - + * on OS X, they define DLT_PKTAP to be DLT_USER2, so programs won't + * be able to distinguish between PKTAP and whatever you were using + * DLT_USER2 for. + * + * If the program saves the capture to a file using this version of + * libpcap's pcap_dump code, the LINKTYPE_ value in the file will be + * LINKTYPE_PKTAP, which will be 258, even on Darwin-based OSes. + * That way, the file will *not* be a DLT_USER2 file. That means + * that the latest version of tcpdump, when built with this version + * of libpcap, and sufficiently recent versions of Wireshark will + * be able to read those files and interpret them correctly; however, + * Apple's version of tcpdump in OS X 10.9 won't be able to handle + * them. (Hopefully, Apple will pick up this version of libpcap, + * and the corresponding version of tcpdump, so that tcpdump will + * be able to handle the old LINKTYPE_USER2 captures *and* the new + * LINKTYPE_PKTAP captures.) + */ +#ifdef __APPLE__ +#define DLT_PKTAP DLT_USER2 +#else +#define DLT_PKTAP 258 +#endif + +/* + * Ethernet packets preceded by a header giving the last 6 octets + * of the preamble specified by 802.3-2012 Clause 65, section + * 65.1.3.2 "Transmit". + */ +#define DLT_EPON 259 + +/* + * IPMI trace packets, as specified by Table 3-20 "Trace Data Block Format" + * in the PICMG HPM.2 specification. + */ +#define DLT_IPMI_HPM_2 260 + +/* + * per Joshua Wright , formats for Zwave captures. + */ +#define DLT_ZWAVE_R1_R2 261 +#define DLT_ZWAVE_R3 262 + +/* + * per Steve Karg , formats for Wattstopper + * Digital Lighting Management room bus serial protocol captures. + */ +#define DLT_WATTSTOPPER_DLM 263 + +/* + * ISO 14443 contactless smart card messages. + */ +#define DLT_ISO_14443 264 + +/* + * Radio data system (RDS) groups. IEC 62106. + * Per Jonathan Brucker . + */ +#define DLT_RDS 265 + +/* + * In case the code that includes this file (directly or indirectly) + * has also included OS files that happen to define DLT_MATCHING_MAX, + * with a different value (perhaps because that OS hasn't picked up + * the latest version of our DLT definitions), we undefine the + * previous value of DLT_MATCHING_MAX. + */ +#ifdef DLT_MATCHING_MAX +#undef DLT_MATCHING_MAX +#endif +#define DLT_MATCHING_MAX 265 /* highest value in the "matching" range */ + +/* + * DLT and savefile link type values are split into a class and + * a member of that class. A class value of 0 indicates a regular + * DLT_/LINKTYPE_ value. + */ +#define DLT_CLASS(x) ((x) & 0x03ff0000) + +/* + * NetBSD-specific generic "raw" link type. The class value indicates + * that this is the generic raw type, and the lower 16 bits are the + * address family we're dealing with. Those values are NetBSD-specific; + * do not assume that they correspond to AF_ values for your operating + * system. + */ +#define DLT_CLASS_NETBSD_RAWAF 0x02240000 +#define DLT_NETBSD_RAWAF(af) (DLT_CLASS_NETBSD_RAWAF | (af)) +#define DLT_NETBSD_RAWAF_AF(x) ((x) & 0x0000ffff) +#define DLT_IS_NETBSD_RAWAF(x) (DLT_CLASS(x) == DLT_CLASS_NETBSD_RAWAF) + +#endif /* !_NET_DLT_H_ */ diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c index 619db8af..b20a391f 100644 --- a/freebsd/sys/net/ieee8023ad_lacp.c +++ b/freebsd/sys/net/ieee8023ad_lacp.c @@ -32,6 +32,8 @@ #include __FBSDID("$FreeBSD$"); +#include + #include #include #include @@ -528,9 +530,6 @@ lacp_port_create(struct lagg_port *lgp) struct ifmultiaddr *rifma = NULL; int error; - boolean_t active = TRUE; /* XXX should be configurable */ - boolean_t fast = FALSE; /* Configurable via ioctl */ - link_init_sdl(ifp, (struct sockaddr *)&sdl, IFT_ETHER); sdl.sdl_alen = ETHER_ADDR_LEN; @@ -559,9 +558,7 @@ lacp_port_create(struct lagg_port *lgp) lacp_fill_actorinfo(lp, &lp->lp_actor); lacp_fill_markerinfo(lp, &lp->lp_marker); - lp->lp_state = - (active ? LACP_STATE_ACTIVITY : 0) | - (fast ? LACP_STATE_TIMEOUT : 0); + lp->lp_state = LACP_STATE_ACTIVITY; lp->lp_aggregator = NULL; lacp_sm_rx_set_expired(lp); LACP_UNLOCK(lsc); @@ -855,6 +852,35 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m) return (lp->lp_lagg); } + +#ifdef RATELIMIT +struct lagg_port * +lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t flowid) +{ + struct lacp_softc *lsc = LACP_SOFTC(sc); + struct lacp_portmap *pm; + struct lacp_port *lp; + uint32_t hash; + + if (__predict_false(lsc->lsc_suppress_distributing)) { + LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__)); + return (NULL); + } + + pm = &lsc->lsc_pmap[lsc->lsc_activemap]; + if (pm->pm_count == 0) { + LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__)); + return (NULL); + } + + hash = flowid >> sc->flowid_shift; + hash %= pm->pm_count; + lp = pm->pm_map[hash]; + + return (lp->lp_lagg); +} +#endif + /* * lacp_suppress_distributing: drop transmit packets for a while * to preserve packet ordering. @@ -1307,6 +1333,10 @@ lacp_select(struct lacp_port *lp) return; } + /* If we haven't heard from our peer, skip this step. */ + if (lp->lp_state & LACP_STATE_DEFAULTED) + return; + KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), ("timer_wait_while still active")); @@ -1662,7 +1692,15 @@ lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du) LACP_STATE_AGGREGATION) && !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner)) || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) { - /* XXX nothing? */ + /* + * XXX Maintain legacy behavior of leaving the + * LACP_STATE_SYNC bit unchanged from the partner's + * advertisement if lsc_strict_mode is false. + * TODO: We should re-examine the concept of the "strict mode" + * to ensure it makes sense to maintain a non-strict mode. + */ + if (lp->lp_lsc->lsc_strict_mode) + lp->lp_partner.lip_state |= LACP_STATE_SYNC; } else { lp->lp_partner.lip_state &= ~LACP_STATE_SYNC; } @@ -1677,10 +1715,6 @@ lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du) sizeof(buf)))); } - /* XXX Hack, still need to implement 5.4.9 para 2,3,4 */ - if (lp->lp_lsc->lsc_strict_mode) - lp->lp_partner.lip_state |= LACP_STATE_SYNC; - lacp_sm_ptx_update_timeout(lp, oldpstate); } diff --git a/freebsd/sys/net/ieee8023ad_lacp.h b/freebsd/sys/net/ieee8023ad_lacp.h index 8f0f51a7..b26e2c92 100644 --- a/freebsd/sys/net/ieee8023ad_lacp.h +++ b/freebsd/sys/net/ieee8023ad_lacp.h @@ -284,6 +284,9 @@ struct lacp_softc { struct mbuf *lacp_input(struct lagg_port *, struct mbuf *); struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *); +#ifdef RATELIMIT +struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t); +#endif void lacp_attach(struct lagg_softc *); void lacp_detach(void *); void lacp_init(struct lagg_softc *); diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c index 882eee5c..9c98366a 100644 --- a/freebsd/sys/net/if.c +++ b/freebsd/sys/net/if.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -146,7 +146,7 @@ int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa); int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); int (*carp_attach_p)(struct ifaddr *, int); -void (*carp_detach_p)(struct ifaddr *); +void (*carp_detach_p)(struct ifaddr *, bool); #endif #ifdef INET int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); @@ -2220,6 +2220,7 @@ void if_down(struct ifnet *ifp) { + EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); if_unroute(ifp, IFF_UP, AF_UNSPEC); } @@ -2232,6 +2233,7 @@ if_up(struct ifnet *ifp) { if_route(ifp, IFF_UP, AF_UNSPEC); + EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); } /* @@ -2703,9 +2705,6 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) return (error); } -/* COMPAT_SVR4 */ -#define OSIOCGIFCONF _IOWR('i', 20, struct ifconf) - #ifdef COMPAT_FREEBSD32 struct ifconf32 { int32_t ifc_len; @@ -2745,7 +2744,6 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) switch (cmd) { case SIOCGIFCONF: - case OSIOCGIFCONF: /* COMPAT_SVR4 */ error = ifconf(cmd, data); CURVNET_RESTORE(); return (error); @@ -3017,15 +3015,6 @@ again: if (prison_if(curthread->td_ucred, sa) != 0) continue; addrs++; - /* COMPAT_SVR4 */ - if (cmd == OSIOCGIFCONF) { - struct osockaddr *osa = - (struct osockaddr *)&ifr.ifr_addr; - ifr.ifr_addr = *sa; - osa->sa_family = sa->sa_family; - sbuf_bcat(sb, &ifr, sizeof(ifr)); - max_len += sizeof(ifr); - } else if (sa->sa_len <= sizeof(*sa)) { ifr.ifr_addr = *sa; sbuf_bcat(sb, &ifr, sizeof(ifr)); @@ -3530,7 +3519,6 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) case IFT_BRIDGE: case IFT_ARCNET: case IFT_IEEE8023ADLAG: - case IFT_IEEE80211: bcopy(lladdr, LLADDR(sdl), len); ifa_free(ifa); break; @@ -4104,6 +4092,51 @@ if_vlancap(if_t ifh) VLAN_CAPABILITIES(ifp); } +int +if_sethwtsomax(if_t ifp, u_int if_hw_tsomax) +{ + + ((struct ifnet *)ifp)->if_hw_tsomax = if_hw_tsomax; + return (0); +} + +int +if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount) +{ + + ((struct ifnet *)ifp)->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount; + return (0); +} + +int +if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize) +{ + + ((struct ifnet *)ifp)->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize; + return (0); +} + +u_int +if_gethwtsomax(if_t ifp) +{ + + return (((struct ifnet *)ifp)->if_hw_tsomax); +} + +u_int +if_gethwtsomaxsegcount(if_t ifp) +{ + + return (((struct ifnet *)ifp)->if_hw_tsomaxsegcount); +} + +u_int +if_gethwtsomaxsegsize(if_t ifp) +{ + + return (((struct ifnet *)ifp)->if_hw_tsomaxsegsize); +} + void if_setinitfn(if_t ifp, void (*init_fn)(void *)) { diff --git a/freebsd/sys/net/if.h b/freebsd/sys/net/if.h index 98ae0a82..e3b801b8 100644 --- a/freebsd/sys/net/if.h +++ b/freebsd/sys/net/if.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -239,6 +239,7 @@ struct if_data { #define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */ #define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */ #define IFCAP_HWSTATS 0x800000 /* manages counters internally */ +#define IFCAP_TXRTLMT 0x1000000 /* hardware supports TX rate limiting */ #define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) diff --git a/freebsd/sys/net/if_arc.h b/freebsd/sys/net/if_arc.h index 23139aa6..b2444e72 100644 --- a/freebsd/sys/net/if_arc.h +++ b/freebsd/sys/net/if_arc.h @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/if_arp.h b/freebsd/sys/net/if_arp.h index 7d141f37..82448026 100644 --- a/freebsd/sys/net/if_arp.h +++ b/freebsd/sys/net/if_arp.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c index fc0dbffd..31e92095 100644 --- a/freebsd/sys/net/if_bridge.c +++ b/freebsd/sys/net/if_bridge.c @@ -911,14 +911,18 @@ bridge_mutecaps(struct bridge_softc *sc) mask &= bif->bif_savedcaps; } + BRIDGE_XLOCK(sc); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { enabled = bif->bif_ifp->if_capenable; enabled &= ~BRIDGE_IFCAPS_STRIP; /* strip off mask bits and enable them again if allowed */ enabled &= ~BRIDGE_IFCAPS_MASK; enabled |= mask; + BRIDGE_UNLOCK(sc); bridge_set_ifcap(sc, bif, enabled); + BRIDGE_LOCK(sc); } + BRIDGE_XDROP(sc); } @@ -929,6 +933,8 @@ bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set) struct ifreq ifr; int error; + BRIDGE_UNLOCK_ASSERT(sc); + bzero(&ifr, sizeof(ifr)); ifr.ifr_reqcap = set; diff --git a/freebsd/sys/net/if_bridgevar.h b/freebsd/sys/net/if_bridgevar.h index 3210c03b..480c90af 100644 --- a/freebsd/sys/net/if_bridgevar.h +++ b/freebsd/sys/net/if_bridgevar.h @@ -280,6 +280,7 @@ struct ifbpstpconf { #define BRIDGE_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define BRIDGE_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define BRIDGE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_mtx, MA_OWNED) +#define BRIDGE_UNLOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_mtx, MA_NOTOWNED) #define BRIDGE_LOCK2REF(_sc, _err) do { \ mtx_assert(&(_sc)->sc_mtx, MA_OWNED); \ if ((_sc)->sc_iflist_xcnt > 0) \ diff --git a/freebsd/sys/net/if_clone.c b/freebsd/sys/net/if_clone.c index 61ba9c6c..cbc56c29 100644 --- a/freebsd/sys/net/if_clone.c +++ b/freebsd/sys/net/if_clone.c @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/if_clone.h b/freebsd/sys/net/if_clone.h index 3a60b0a1..d55aba20 100644 --- a/freebsd/sys/net/if_clone.h +++ b/freebsd/sys/net/if_clone.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/if_dead.c b/freebsd/sys/net/if_dead.c index e290823c..fc584f29 100644 --- a/freebsd/sys/net/if_dead.c +++ b/freebsd/sys/net/if_dead.c @@ -102,6 +102,30 @@ ifdead_get_counter(struct ifnet *ifp, ift_counter cnt) return (0); } +static int +ifdead_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, + struct m_snd_tag **ppmt) +{ + return (EOPNOTSUPP); +} + +static int +ifdead_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) +{ + return (EOPNOTSUPP); +} + +static int +ifdead_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) +{ + return (EOPNOTSUPP); +} + +static void +ifdead_snd_tag_free(struct m_snd_tag *pmt) +{ +} + void if_dead(struct ifnet *ifp) { @@ -114,4 +138,8 @@ if_dead(struct ifnet *ifp) ifp->if_qflush = ifdead_qflush; ifp->if_transmit = ifdead_transmit; ifp->if_get_counter = ifdead_get_counter; + ifp->if_snd_tag_alloc = ifdead_snd_tag_alloc; + ifp->if_snd_tag_modify = ifdead_snd_tag_modify; + ifp->if_snd_tag_query = ifdead_snd_tag_query; + ifp->if_snd_tag_free = ifdead_snd_tag_free; } diff --git a/freebsd/sys/net/if_disc.c b/freebsd/sys/net/if_disc.c index a2e5a7e8..aaaea715 100644 --- a/freebsd/sys/net/if_disc.c +++ b/freebsd/sys/net/if_disc.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/if_dl.h b/freebsd/sys/net/if_dl.h index f53bc5e4..5c4f273f 100644 --- a/freebsd/sys/net/if_dl.h +++ b/freebsd/sys/net/if_dl.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/if_edsc.c b/freebsd/sys/net/if_edsc.c index d90f072a..cff77c6b 100644 --- a/freebsd/sys/net/if_edsc.c +++ b/freebsd/sys/net/if_edsc.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following edsclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/if_enc.c b/freebsd/sys/net/if_enc.c index d0d065b8..80be97ca 100644 --- a/freebsd/sys/net/if_enc.c +++ b/freebsd/sys/net/if_enc.c @@ -425,3 +425,4 @@ static moduledata_t enc_mod = { }; DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(if_enc, 1); diff --git a/freebsd/sys/net/if_epair.c b/freebsd/sys/net/if_epair.c index b4f73d68..13294912 100644 --- a/freebsd/sys/net/if_epair.c +++ b/freebsd/sys/net/if_epair.c @@ -833,7 +833,8 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) ifp->if_start = epair_start; ifp->if_ioctl = epair_ioctl; ifp->if_init = epair_init; - ifp->if_snd.ifq_maxlen = ifqmaxlen; + if_setsendqlen(ifp, ifqmaxlen); + if_setsendqready(ifp); /* Assign a hopefully unique, locally administered etheraddr. */ eaddr[0] = 0x02; eaddr[3] = (ifp->if_index >> 8) & 0xff; @@ -859,7 +860,8 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) ifp->if_start = epair_start; ifp->if_ioctl = epair_ioctl; ifp->if_init = epair_init; - ifp->if_snd.ifq_maxlen = ifqmaxlen; + if_setsendqlen(ifp, ifqmaxlen); + if_setsendqready(ifp); /* We need to play some tricks here for the second interface. */ strlcpy(name, epairname, len); error = if_clone_create(name, len, (caddr_t)scb); diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c index 1d22c0a6..a9f20571 100644 --- a/freebsd/sys/net/if_ethersubr.c +++ b/freebsd/sys/net/if_ethersubr.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/if_fddisubr.c b/freebsd/sys/net/if_fddisubr.c index 98ac4cc3..3e7983c1 100644 --- a/freebsd/sys/net/if_fddisubr.c +++ b/freebsd/sys/net/if_fddisubr.c @@ -402,7 +402,7 @@ fddi_input(ifp, m) m_adj(m, FDDI_HDR_LEN); m = m_pullup(m, LLC_SNAPFRAMELEN); - if (m == 0) { + if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto dropanyway; } diff --git a/freebsd/sys/net/if_fwsubr.c b/freebsd/sys/net/if_fwsubr.c index df4c38cf..3414daf1 100644 --- a/freebsd/sys/net/if_fwsubr.c +++ b/freebsd/sys/net/if_fwsubr.c @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/if_iso88025subr.c b/freebsd/sys/net/if_iso88025subr.c index 38322b23..bd9fcfa4 100644 --- a/freebsd/sys/net/if_iso88025subr.c +++ b/freebsd/sys/net/if_iso88025subr.c @@ -489,7 +489,7 @@ iso88025_input(ifp, m) m_adj(m, mac_hdr_len); m = m_pullup(m, LLC_SNAPFRAMELEN); - if (m == 0) { + if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto dropanyway; } diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c index b5b61364..41fc35f0 100644 --- a/freebsd/sys/net/if_lagg.c +++ b/freebsd/sys/net/if_lagg.c @@ -25,6 +25,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -120,6 +121,11 @@ static void lagg_port2req(struct lagg_port *, struct lagg_reqport *); static void lagg_init(void *); static void lagg_stop(struct lagg_softc *); static int lagg_ioctl(struct ifnet *, u_long, caddr_t); +#ifdef RATELIMIT +static int lagg_snd_tag_alloc(struct ifnet *, + union if_snd_tag_alloc_params *, + struct m_snd_tag **); +#endif static int lagg_ether_setmulti(struct lagg_softc *); static int lagg_ether_cmdmulti(struct lagg_port *, int); static int lagg_setflag(struct lagg_port *, int, int, @@ -505,7 +511,12 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) ifp->if_ioctl = lagg_ioctl; ifp->if_get_counter = lagg_get_counter; ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; +#ifdef RATELIMIT + ifp->if_snd_tag_alloc = lagg_snd_tag_alloc; + ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS | IFCAP_TXRTLMT; +#else ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; +#endif /* * Attach as an ordinary ethernet device, children will be attached @@ -541,12 +552,15 @@ lagg_clone_destroy(struct ifnet *ifp) EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); /* Shutdown and remove lagg ports */ - while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL) + while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL) { + lp->lp_detaching = LAGG_CLONE_DESTROY; lagg_port_destroy(lp, 1); + } /* Unhook the aggregation protocol */ lagg_proto_detach(sc); LAGG_UNLOCK_ASSERT(sc); + taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task); ifmedia_removeall(&sc->sc_media); ether_ifdetach(ifp); if_free(ifp); @@ -555,7 +569,6 @@ lagg_clone_destroy(struct ifnet *ifp) SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries); LAGG_LIST_UNLOCK(); - taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task); LAGG_LOCK_DESTROY(sc); free(sc, M_DEVBUF); } @@ -893,7 +906,7 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport) * Remove multicast addresses and interface flags from this port and * reset the MAC address, skip if the interface is being detached. */ - if (!lp->lp_detaching) { + if (lp->lp_detaching == 0) { lagg_ether_cmdmulti(lp, 0); lagg_setflags(lp, 0); lagg_port_lladdr(lp, lp->lp_lladdr, LAGG_LLQTYPE_PHYS); @@ -926,7 +939,8 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport) bcopy(lp0->lp_lladdr, lladdr, ETHER_ADDR_LEN); } - lagg_lladdr(sc, lladdr); + if (lp->lp_detaching != LAGG_CLONE_DESTROY) + lagg_lladdr(sc, lladdr); /* Mark lp0 as new primary */ sc->sc_primary = lp0; @@ -941,7 +955,7 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport) } /* Remove any pending lladdr changes from the queue */ - if (lp->lp_detaching) { + if (lp->lp_detaching != 0) { SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) { if (llq->llq_ifp == ifp) { SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq, @@ -1120,7 +1134,7 @@ lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp) sc = lp->lp_softc; LAGG_WLOCK(sc); - lp->lp_detaching = 1; + lp->lp_detaching = LAGG_PORT_DETACH; lagg_port_destroy(lp, 1); LAGG_WUNLOCK(sc); } @@ -1551,6 +1565,52 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) return (error); } +#ifdef RATELIMIT +static int +lagg_snd_tag_alloc(struct ifnet *ifp, + union if_snd_tag_alloc_params *params, + struct m_snd_tag **ppmt) +{ + struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; + struct lagg_port *lp; + struct lagg_lb *lb; + uint32_t p; + + switch (sc->sc_proto) { + case LAGG_PROTO_FAILOVER: + lp = lagg_link_active(sc, sc->sc_primary); + break; + case LAGG_PROTO_LOADBALANCE: + if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 || + params->hdr.flowtype == M_HASHTYPE_NONE) + return (EOPNOTSUPP); + p = params->hdr.flowid >> sc->flowid_shift; + p %= sc->sc_count; + lb = (struct lagg_lb *)sc->sc_psc; + lp = lb->lb_ports[p]; + lp = lagg_link_active(sc, lp); + break; + case LAGG_PROTO_LACP: + if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 || + params->hdr.flowtype == M_HASHTYPE_NONE) + return (EOPNOTSUPP); + lp = lacp_select_tx_port_by_hash(sc, params->hdr.flowid); + break; + default: + return (EOPNOTSUPP); + } + if (lp == NULL) + return (EOPNOTSUPP); + ifp = lp->lp_ifp; + if (ifp == NULL || ifp->if_snd_tag_alloc == NULL || + (ifp->if_capenable & IFCAP_TXRTLMT) == 0) + return (EOPNOTSUPP); + + /* forward allocation request */ + return (ifp->if_snd_tag_alloc(ifp, params, ppmt)); +} +#endif + static int lagg_ether_setmulti(struct lagg_softc *sc) { @@ -1605,7 +1665,7 @@ lagg_ether_cmdmulti(struct lagg_port *lp, int set) } else { while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) { SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries); - if (mc->mc_ifma && !lp->lp_detaching) + if (mc->mc_ifma && lp->lp_detaching == 0) if_delmulti_ifma(mc->mc_ifma); free(mc, M_DEVBUF); } diff --git a/freebsd/sys/net/if_lagg.h b/freebsd/sys/net/if_lagg.h index 334995e5..81eeeb89 100644 --- a/freebsd/sys/net/if_lagg.h +++ b/freebsd/sys/net/if_lagg.h @@ -261,6 +261,8 @@ struct lagg_port { void *lh_cookie; /* if state hook */ void *lp_psc; /* protocol data */ int lp_detaching; /* ifnet is detaching */ +#define LAGG_PORT_DETACH 0x01 /* detach lagg port */ +#define LAGG_CLONE_DESTROY 0x02 /* destroy lagg clone */ SLIST_HEAD(__mclhd, lagg_mc) lp_mc_head; /* multicast addresses */ diff --git a/freebsd/sys/net/if_llc.h b/freebsd/sys/net/if_llc.h index 0d96372e..7a959b9c 100644 --- a/freebsd/sys/net/if_llc.h +++ b/freebsd/sys/net/if_llc.h @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/if_loop.c b/freebsd/sys/net/if_loop.c index 5ee82fc0..fd650270 100644 --- a/freebsd/sys/net/if_loop.c +++ b/freebsd/sys/net/if_loop.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/if_media.c b/freebsd/sys/net/if_media.c index 66b13568..d8167d14 100644 --- a/freebsd/sys/net/if_media.c +++ b/freebsd/sys/net/if_media.c @@ -109,6 +109,7 @@ ifmedia_removeall(ifm) LIST_REMOVE(entry, ifm_list); free(entry, M_IFADDR); } + ifm->ifm_cur = NULL; } /* diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c index 7c1b7075..e07cf0fe 100644 --- a/freebsd/sys/net/if_stf.c +++ b/freebsd/sys/net/if_stf.c @@ -204,10 +204,16 @@ stf_clone_match(struct if_clone *ifc, const char *name) static int stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) { - int err, unit; + char *dp; + int err, unit, wildcard; struct stf_softc *sc; struct ifnet *ifp; + err = ifc_name2unit(name, &unit); + if (err != 0) + return (err); + wildcard = (unit < 0); + /* * We can only have one unit, but since unit allocation is * already locked, we use it to keep from allocating extra @@ -235,7 +241,20 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) /* * Set the name manually rather then using if_initname because * we don't conform to the default naming convention for interfaces. + * In the wildcard case, we need to update the name. */ + if (wildcard) { + for (dp = name; *dp != '\0'; dp++); + if (snprintf(dp, len - (dp-name), "%d", unit) > + len - (dp-name) - 1) { + /* + * This can only be a programmer error and + * there's no straightforward way to recover if + * it happens. + */ + panic("if_clone_create(): interface name too long"); + } + } strlcpy(ifp->if_xname, name, IFNAMSIZ); ifp->if_dname = stfname; ifp->if_dunit = IF_DUNIT_NONE; @@ -327,8 +346,7 @@ stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg) if (proto != IPPROTO_IPV6) return 0; - /* LINTED const cast */ - m_copydata((struct mbuf *)(uintptr_t)m, 0, sizeof(ip), (caddr_t)&ip); + m_copydata(m, 0, sizeof(ip), (caddr_t)&ip); if (ip.ip_v != 4) return 0; diff --git a/freebsd/sys/net/if_types.h b/freebsd/sys/net/if_types.h index 92e101ac..b9a752df 100644 --- a/freebsd/sys/net/if_types.h +++ b/freebsd/sys/net/if_types.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -113,7 +113,7 @@ typedef enum { IFT_QLLC = 0x44, /* SNA QLLC */ IFT_FASTETHERFX = 0x45, /* Fast Ethernet (100BaseFX) */ IFT_CHANNEL = 0x46, /* channel */ - IFT_IEEE80211 = 0x47, /* radio spread spectrum */ + IFT_IEEE80211 = 0x47, /* radio spread spectrum (unused) */ IFT_IBM370PARCHAN = 0x48, /* IBM System 360/370 OEMI Channel */ IFT_ESCON = 0x49, /* IBM Enterprise Systems Connection */ IFT_DLSW = 0x4a, /* Data Link Switching */ diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h index 6ddb6c7b..4d93ed72 100644 --- a/freebsd/sys/net/if_var.h +++ b/freebsd/sys/net/if_var.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -175,6 +175,49 @@ struct if_encap_req { #define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */ +/* + * Network interface send tag support. The storage of "struct + * m_snd_tag" comes from the network driver and it is free to allocate + * as much additional space as it wants for its own use. + */ +struct m_snd_tag; + +#define IF_SND_TAG_TYPE_RATE_LIMIT 0 +#define IF_SND_TAG_TYPE_MAX 1 + +struct if_snd_tag_alloc_header { + uint32_t type; /* send tag type, see IF_SND_TAG_XXX */ + uint32_t flowid; /* mbuf hash value */ + uint32_t flowtype; /* mbuf hash type */ +}; + +struct if_snd_tag_alloc_rate_limit { + struct if_snd_tag_alloc_header hdr; + uint64_t max_rate; /* in bytes/s */ +}; + +struct if_snd_tag_rate_limit_params { + uint64_t max_rate; /* in bytes/s */ +}; + +union if_snd_tag_alloc_params { + struct if_snd_tag_alloc_header hdr; + struct if_snd_tag_alloc_rate_limit rate_limit; +}; + +union if_snd_tag_modify_params { + struct if_snd_tag_rate_limit_params rate_limit; +}; + +union if_snd_tag_query_params { + struct if_snd_tag_rate_limit_params rate_limit; +}; + +typedef int (if_snd_tag_alloc_t)(struct ifnet *, union if_snd_tag_alloc_params *, + struct m_snd_tag **); +typedef int (if_snd_tag_modify_t)(struct m_snd_tag *, union if_snd_tag_modify_params *); +typedef int (if_snd_tag_query_t)(struct m_snd_tag *, union if_snd_tag_query_params *); +typedef void (if_snd_tag_free_t)(struct m_snd_tag *); /* * Structure defining a network interface. @@ -303,13 +346,20 @@ struct ifnet { u_int if_hw_tsomaxsegcount; /* TSO maximum segment count */ u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */ + /* + * Network adapter send tag support: + */ + if_snd_tag_alloc_t *if_snd_tag_alloc; + if_snd_tag_modify_t *if_snd_tag_modify; + if_snd_tag_query_t *if_snd_tag_query; + if_snd_tag_free_t *if_snd_tag_free; + /* * Spare fields to be added before branching a stable branch, so * that structure can be enhanced without changing the kernel * binary interface. */ - void *if_pspare[4]; /* packet pacing / general use */ - int if_ispare[4]; /* packet pacing / general use */ + int if_ispare[4]; /* general use */ }; /* for compatibility with other BSDs */ @@ -354,6 +404,11 @@ EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t); /* Interface link state change event */ typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int); EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t); +/* Interface up/down event */ +#define IFNET_EVENT_UP 0 +#define IFNET_EVENT_DOWN 1 +typedef void (*ifnet_event_fn)(void *, struct ifnet *ifp, int event); +EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn); #endif /* _SYS_EVENTHANDLER_H_ */ /* @@ -603,6 +658,12 @@ int if_getflags(if_t ifp); int if_sendq_empty(if_t ifp); int if_setsendqready(if_t ifp); int if_setsendqlen(if_t ifp, int tx_desc_count); +int if_sethwtsomax(if_t ifp, u_int if_hw_tsomax); +int if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount); +int if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize); +u_int if_gethwtsomax(if_t ifp); +u_int if_gethwtsomaxsegcount(if_t ifp); +u_int if_gethwtsomaxsegsize(if_t ifp); int if_input(if_t ifp, struct mbuf* sendmp); int if_sendq_prepend(if_t ifp, struct mbuf *m); struct mbuf *if_dequeue(if_t ifp); diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c index 8a93565b..381231b6 100644 --- a/freebsd/sys/net/if_vlan.c +++ b/freebsd/sys/net/if_vlan.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -214,6 +215,10 @@ static void trunk_destroy(struct ifvlantrunk *trunk); static void vlan_init(void *foo); static void vlan_input(struct ifnet *ifp, struct mbuf *m); static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); +#ifdef RATELIMIT +static int vlan_snd_tag_alloc(struct ifnet *, + union if_snd_tag_alloc_params *, struct m_snd_tag **); +#endif static void vlan_qflush(struct ifnet *ifp); static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)); @@ -973,6 +978,9 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) ifp->if_transmit = vlan_transmit; ifp->if_qflush = vlan_qflush; ifp->if_ioctl = vlan_ioctl; +#ifdef RATELIMIT + ifp->if_snd_tag_alloc = vlan_snd_tag_alloc; +#endif ifp->if_flags = VLAN_IFFLAGS; ether_ifattach(ifp, eaddr); /* Now undo some of the damage... */ @@ -1593,6 +1601,15 @@ vlan_capabilities(struct ifvlan *ifv) TOEDEV(ifp) = TOEDEV(p); ifp->if_capenable |= p->if_capenable & IFCAP_TOE; } + +#ifdef RATELIMIT + /* + * If the parent interface supports ratelimiting, so does the + * VLAN interface. + */ + ifp->if_capabilities |= (p->if_capabilities & IFCAP_TXRTLMT); + ifp->if_capenable |= (p->if_capenable & IFCAP_TXRTLMT); +#endif } static void @@ -1803,3 +1820,19 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) return (error); } + +#ifdef RATELIMIT +static int +vlan_snd_tag_alloc(struct ifnet *ifp, + union if_snd_tag_alloc_params *params, + struct m_snd_tag **ppmt) +{ + + /* get trunk device */ + ifp = vlan_trunkdev(ifp); + if (ifp == NULL || (ifp->if_capenable & IFCAP_TXRTLMT) == 0) + return (EOPNOTSUPP); + /* forward allocation request */ + return (ifp->if_snd_tag_alloc(ifp, params, ppmt)); +} +#endif diff --git a/freebsd/sys/net/iflib.h b/freebsd/sys/net/iflib.h new file mode 100644 index 00000000..f5f98cdf --- /dev/null +++ b/freebsd/sys/net/iflib.h @@ -0,0 +1,393 @@ +/*- + * Copyright (c) 2014-2017, Matthew Macy (mmacy@nextbsd.org) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Neither the name of Matthew Macy nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef __IFLIB_H_ +#define __IFLIB_H_ + +#include +#include +#include +#include +#include +#include +#include + +/* + * The value type for indexing, limits max descriptors + * to 65535 can be conditionally redefined to uint32_t + * in the future if the need arises. + */ +typedef uint16_t qidx_t; +#define QIDX_INVALID 0xFFFF +/* + * Most cards can handle much larger TSO requests + * but the FreeBSD TCP stack will break on larger + * values + */ +#define FREEBSD_TSO_SIZE_MAX 65518 + + +struct iflib_ctx; +typedef struct iflib_ctx *if_ctx_t; +struct if_shared_ctx; +typedef struct if_shared_ctx *if_shared_ctx_t; +struct if_int_delay_info; +typedef struct if_int_delay_info *if_int_delay_info_t; + +/* + * File organization: + * - public structures + * - iflib accessors + * - iflib utility functions + * - iflib core functions + */ + +typedef struct if_rxd_frag { + uint8_t irf_flid; + qidx_t irf_idx; + uint16_t irf_len; +} *if_rxd_frag_t; + +typedef struct if_rxd_info { + /* set by iflib */ + uint16_t iri_qsidx; /* qset index */ + uint16_t iri_vtag; /* vlan tag - if flag set */ + /* XXX redundant with the new irf_len field */ + uint16_t iri_len; /* packet length */ + qidx_t iri_cidx; /* consumer index of cq */ + struct ifnet *iri_ifp; /* some drivers >1 interface per softc */ + + /* updated by driver */ + if_rxd_frag_t iri_frags; + uint32_t iri_flowid; /* RSS hash for packet */ + uint32_t iri_csum_flags; /* m_pkthdr csum flags */ + + uint32_t iri_csum_data; /* m_pkthdr csum data */ + uint8_t iri_flags; /* mbuf flags for packet */ + uint8_t iri_nfrags; /* number of fragments in packet */ + uint8_t iri_rsstype; /* RSS hash type */ + uint8_t iri_pad; /* any padding in the received data */ +} *if_rxd_info_t; + +typedef struct if_rxd_update { + uint64_t *iru_paddrs; + caddr_t *iru_vaddrs; + qidx_t *iru_idxs; + qidx_t iru_pidx; + uint16_t iru_qsidx; + uint16_t iru_count; + uint16_t iru_buf_size; + uint8_t iru_flidx; +} *if_rxd_update_t; + +#define IPI_TX_INTR 0x1 /* send an interrupt when this packet is sent */ +#define IPI_TX_IPV4 0x2 /* ethertype IPv4 */ +#define IPI_TX_IPV6 0x4 /* ethertype IPv6 */ + +typedef struct if_pkt_info { + bus_dma_segment_t *ipi_segs; /* physical addresses */ + uint32_t ipi_len; /* packet length */ + uint16_t ipi_qsidx; /* queue set index */ + qidx_t ipi_nsegs; /* number of segments */ + + qidx_t ipi_ndescs; /* number of descriptors used by encap */ + uint16_t ipi_flags; /* iflib per-packet flags */ + qidx_t ipi_pidx; /* start pidx for encap */ + qidx_t ipi_new_pidx; /* next available pidx post-encap */ + /* offload handling */ + uint8_t ipi_ehdrlen; /* ether header length */ + uint8_t ipi_ip_hlen; /* ip header length */ + uint8_t ipi_tcp_hlen; /* tcp header length */ + uint8_t ipi_ipproto; /* ip protocol */ + + uint32_t ipi_csum_flags; /* packet checksum flags */ + uint16_t ipi_tso_segsz; /* tso segment size */ + uint16_t ipi_vtag; /* VLAN tag */ + uint16_t ipi_etype; /* ether header type */ + uint8_t ipi_tcp_hflags; /* tcp header flags */ + uint8_t ipi_mflags; /* packet mbuf flags */ + + uint32_t ipi_tcp_seq; /* tcp seqno */ + uint32_t ipi_tcp_sum; /* tcp csum */ +} *if_pkt_info_t; + +typedef struct if_irq { + struct resource *ii_res; + int ii_rid; + void *ii_tag; +} *if_irq_t; + +struct if_int_delay_info { + if_ctx_t iidi_ctx; /* Back-pointer to the iflib ctx (softc) */ + int iidi_offset; /* Register offset to read/write */ + int iidi_value; /* Current value in usecs */ + struct sysctl_oid *iidi_oidp; + struct sysctl_req *iidi_req; +}; + +typedef enum { + IFLIB_INTR_LEGACY, + IFLIB_INTR_MSI, + IFLIB_INTR_MSIX +} iflib_intr_mode_t; + +/* + * This really belongs in pciio.h or some place more general + * but this is the only consumer for now. + */ +typedef struct pci_vendor_info { + uint32_t pvi_vendor_id; + uint32_t pvi_device_id; + uint32_t pvi_subvendor_id; + uint32_t pvi_subdevice_id; + uint32_t pvi_rev_id; + uint32_t pvi_class_mask; + caddr_t pvi_name; +} pci_vendor_info_t; + +#define PVID(vendor, devid, name) {vendor, devid, 0, 0, 0, 0, name} +#define PVID_OEM(vendor, devid, svid, sdevid, revid, name) {vendor, devid, svid, sdevid, revid, 0, name} +#define PVID_END {0, 0, 0, 0, 0, 0, NULL} + +typedef struct if_txrx { + int (*ift_txd_encap) (void *, if_pkt_info_t); + void (*ift_txd_flush) (void *, uint16_t, qidx_t pidx); + int (*ift_txd_credits_update) (void *, uint16_t qsidx, bool clear); + + int (*ift_rxd_available) (void *, uint16_t qsidx, qidx_t pidx, qidx_t budget); + int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri); + void (*ift_rxd_refill) (void * , if_rxd_update_t iru); + void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx); + int (*ift_legacy_intr) (void *); +} *if_txrx_t; + +typedef struct if_softc_ctx { + int isc_vectors; + int isc_nrxqsets; + int isc_ntxqsets; + int isc_msix_bar; /* can be model specific - initialize in attach_pre */ + int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */ + int isc_ntxd[8]; + int isc_nrxd[8]; + + uint32_t isc_txqsizes[8]; + uint32_t isc_rxqsizes[8]; + /* is there such thing as a descriptor that is more than 248 bytes ? */ + uint8_t isc_txd_size[8]; + uint8_t isc_rxd_size[8]; + + int isc_max_txqsets; + int isc_max_rxqsets; + int isc_tx_tso_segments_max; + int isc_tx_tso_size_max; + int isc_tx_tso_segsize_max; + int isc_tx_csum_flags; + int isc_capenable; + int isc_rss_table_size; + int isc_rss_table_mask; + int isc_nrxqsets_max; + int isc_ntxqsets_max; + + iflib_intr_mode_t isc_intr; + uint16_t isc_max_frame_size; /* set at init time by driver */ + pci_vendor_info_t isc_vendor_info; /* set by iflib prior to attach_pre */ + if_txrx_t isc_txrx; +} *if_softc_ctx_t; + +/* + * Initialization values for device + */ +struct if_shared_ctx { + int isc_magic; + driver_t *isc_driver; + bus_size_t isc_q_align; + bus_size_t isc_tx_maxsize; + bus_size_t isc_tx_maxsegsize; + bus_size_t isc_rx_maxsize; + bus_size_t isc_rx_maxsegsize; + int isc_rx_nsegments; + int isc_admin_intrcnt; /* # of admin/link interrupts */ + + /* fields necessary for probe */ + pci_vendor_info_t *isc_vendor_info; + char *isc_driver_version; +/* optional function to transform the read values to match the table*/ + void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id, + uint16_t *subdevice_id, uint16_t *rev_id); + int isc_nrxd_min[8]; + int isc_nrxd_default[8]; + int isc_nrxd_max[8]; + int isc_ntxd_min[8]; + int isc_ntxd_default[8]; + int isc_ntxd_max[8]; + + /* actively used during operation */ + int isc_nfl __aligned(CACHE_LINE_SIZE); + int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */ + int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */ + int isc_rx_process_limit; + int isc_tx_reclaim_thresh; + int isc_flags; +}; + +typedef struct iflib_dma_info { + bus_addr_t idi_paddr; + caddr_t idi_vaddr; + bus_dma_tag_t idi_tag; + bus_dmamap_t idi_map; + uint32_t idi_size; +} *iflib_dma_info_t; + +#define IFLIB_MAGIC 0xCAFEF00D + +typedef enum { + IFLIB_INTR_RX, + IFLIB_INTR_TX, + IFLIB_INTR_RXTX, + IFLIB_INTR_ADMIN, + IFLIB_INTR_IOV, +} iflib_intr_type_t; + +#ifndef ETH_ADDR_LEN +#define ETH_ADDR_LEN 6 +#endif + + +/* + * Interface has a separate command queue for RX + */ +#define IFLIB_HAS_RXCQ 0x01 +/* + * Driver has already allocated vectors + */ +#define IFLIB_SKIP_MSIX 0x02 +/* + * Interface is a virtual function + */ +#define IFLIB_IS_VF 0x04 +/* + * Interface has a separate command queue for TX + */ +#define IFLIB_HAS_TXCQ 0x08 +/* + * Interface does checksum in place + */ +#define IFLIB_NEED_SCRATCH 0x10 +/* + * Interface doesn't expect in_pseudo for th_sum + */ +#define IFLIB_TSO_INIT_IP 0x20 +/* + * Interface doesn't align IP header + */ +#define IFLIB_DO_RX_FIXUP 0x40 + + + +/* + * field accessors + */ +void *iflib_get_softc(if_ctx_t ctx); + +device_t iflib_get_dev(if_ctx_t ctx); + +if_t iflib_get_ifp(if_ctx_t ctx); + +struct ifmedia *iflib_get_media(if_ctx_t ctx); + +if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx); +if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx); + +void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]); + +/* + * If the driver can plug cleanly in to newbus use these + */ +int iflib_device_probe(device_t); +int iflib_device_attach(device_t); +int iflib_device_detach(device_t); +int iflib_device_suspend(device_t); +int iflib_device_resume(device_t); +int iflib_device_shutdown(device_t); + + +int iflib_device_iov_init(device_t, uint16_t, const nvlist_t *); +void iflib_device_iov_uninit(device_t); +int iflib_device_iov_add_vf(device_t, uint16_t, const nvlist_t *); + +/* + * If the driver can't plug cleanly in to newbus + * use these + */ +int iflib_device_register(device_t dev, void *softc, if_shared_ctx_t sctx, if_ctx_t *ctxp); +int iflib_device_deregister(if_ctx_t); + + + +int iflib_irq_alloc(if_ctx_t, if_irq_t, int, driver_filter_t, void *filter_arg, driver_intr_t, void *arg, char *name); +int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, + iflib_intr_type_t type, driver_filter_t *filter, + void *filter_arg, int qid, char *name); +void iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void *arg, int qid, char *name); + +void iflib_irq_free(if_ctx_t ctx, if_irq_t irq); + +void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name); + +void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, + gtask_fn_t *fn, char *name); + +void iflib_config_gtask_deinit(struct grouptask *gtask); + + + +void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid); +void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid); +void iflib_admin_intr_deferred(if_ctx_t ctx); +void iflib_iov_intr_deferred(if_ctx_t ctx); + + +void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate); + +int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags); +void iflib_dma_free(iflib_dma_info_t dma); + +int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count); + +void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count); + + +struct mtx *iflib_ctx_lock_get(if_ctx_t); +struct mtx *iflib_qset_lock_get(if_ctx_t, uint16_t); + +void iflib_led_create(if_ctx_t ctx); + +void iflib_add_int_delay_sysctl(if_ctx_t, const char *, const char *, + if_int_delay_info_t, int, int); + +#endif /* __IFLIB_H_ */ diff --git a/freebsd/sys/net/ifq.h b/freebsd/sys/net/ifq.h index f0d206d8..678b62f1 100644 --- a/freebsd/sys/net/ifq.h +++ b/freebsd/sys/net/ifq.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c index f14b2e95..b90ab80e 100644 --- a/freebsd/sys/net/netisr.c +++ b/freebsd/sys/net/netisr.c @@ -1322,9 +1322,7 @@ netisr_start_swi(u_int cpuid, struct pcpu *pc) static void netisr_init(void *arg) { -#ifdef EARLY_AP_STARTUP struct pcpu *pc; -#endif NETISR_LOCK_INIT(); if (netisr_maxthreads == 0 || netisr_maxthreads < -1 ) @@ -1363,7 +1361,8 @@ netisr_init(void *arg) } #else #ifndef __rtems__ - netisr_start_swi(curcpu, pcpu_find(curcpu)); + pc = get_pcpu(); + netisr_start_swi(pc->pc_cpuid, pc); #else /* __rtems__ */ netisr_start_swi(0, NULL); #endif /* __rtems__ */ diff --git a/freebsd/sys/net/pfil.c b/freebsd/sys/net/pfil.c index 8f4f50b8..54407f65 100644 --- a/freebsd/sys/net/pfil.c +++ b/freebsd/sys/net/pfil.c @@ -63,7 +63,32 @@ LIST_HEAD(pfilheadhead, pfil_head); VNET_DEFINE(struct pfilheadhead, pfil_head_list); #define V_pfil_head_list VNET(pfil_head_list) VNET_DEFINE(struct rmlock, pfil_lock); -#define V_pfil_lock VNET(pfil_lock) + +#define PFIL_LOCK_INIT_REAL(l, t) \ + rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE) +#define PFIL_LOCK_DESTROY_REAL(l) \ + rm_destroy(l) +#define PFIL_LOCK_INIT(p) do { \ + if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) { \ + PFIL_LOCK_INIT_REAL(&(p)->ph_lock, "private"); \ + (p)->ph_plock = &(p)->ph_lock; \ + } else \ + (p)->ph_plock = &V_pfil_lock; \ +} while (0) +#define PFIL_LOCK_DESTROY(p) do { \ + if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) \ + PFIL_LOCK_DESTROY_REAL((p)->ph_plock); \ +} while (0) + +#define PFIL_TRY_RLOCK(p, t) rm_try_rlock((p)->ph_plock, (t)) +#define PFIL_RLOCK(p, t) rm_rlock((p)->ph_plock, (t)) +#define PFIL_WLOCK(p) rm_wlock((p)->ph_plock) +#define PFIL_RUNLOCK(p, t) rm_runlock((p)->ph_plock, (t)) +#define PFIL_WUNLOCK(p) rm_wunlock((p)->ph_plock) +#define PFIL_WOWNED(p) rm_wowned((p)->ph_plock) + +#define PFIL_HEADLIST_LOCK() mtx_lock(&pfil_global_lock) +#define PFIL_HEADLIST_UNLOCK() mtx_unlock(&pfil_global_lock) /* * pfil_run_hooks() runs the specified packet filter hook chain. diff --git a/freebsd/sys/net/pfil.h b/freebsd/sys/net/pfil.h index b78023b7..aee40e8e 100644 --- a/freebsd/sys/net/pfil.h +++ b/freebsd/sys/net/pfil.h @@ -38,6 +38,7 @@ #include #include #include +#include struct mbuf; struct ifnet; @@ -94,6 +95,9 @@ struct pfil_head { LIST_ENTRY(pfil_head) ph_list; }; +VNET_DECLARE(struct rmlock, pfil_lock); +#define V_pfil_lock VNET(pfil_lock) + /* Public functions for pfil hook management by packet filters. */ struct pfil_head *pfil_head_get(int, u_long); int pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *); @@ -109,7 +113,6 @@ int pfil_head_register(struct pfil_head *); int pfil_head_unregister(struct pfil_head *); /* Public pfil locking functions for self managed locks by packet filters. */ -struct rm_priotracker; /* Do not require including rmlock header */ int pfil_try_rlock(struct pfil_head *, struct rm_priotracker *); void pfil_rlock(struct pfil_head *, struct rm_priotracker *); void pfil_runlock(struct pfil_head *, struct rm_priotracker *); @@ -117,32 +120,4 @@ void pfil_wlock(struct pfil_head *); void pfil_wunlock(struct pfil_head *); int pfil_wowned(struct pfil_head *ph); -/* Internal pfil locking functions. */ -#define PFIL_LOCK_INIT_REAL(l, t) \ - rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE) -#define PFIL_LOCK_DESTROY_REAL(l) \ - rm_destroy(l) -#define PFIL_LOCK_INIT(p) do { \ - if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) { \ - PFIL_LOCK_INIT_REAL(&(p)->ph_lock, "private"); \ - (p)->ph_plock = &(p)->ph_lock; \ - } else \ - (p)->ph_plock = &V_pfil_lock; \ -} while (0) -#define PFIL_LOCK_DESTROY(p) do { \ - if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) \ - PFIL_LOCK_DESTROY_REAL((p)->ph_plock); \ -} while (0) - -#define PFIL_TRY_RLOCK(p, t) rm_try_rlock((p)->ph_plock, (t)) -#define PFIL_RLOCK(p, t) rm_rlock((p)->ph_plock, (t)) -#define PFIL_WLOCK(p) rm_wlock((p)->ph_plock) -#define PFIL_RUNLOCK(p, t) rm_runlock((p)->ph_plock, (t)) -#define PFIL_WUNLOCK(p) rm_wunlock((p)->ph_plock) -#define PFIL_WOWNED(p) rm_wowned((p)->ph_plock) - -/* Internal locking macros for global/vnet pfil_head_list. */ -#define PFIL_HEADLIST_LOCK() mtx_lock(&pfil_global_lock) -#define PFIL_HEADLIST_UNLOCK() mtx_unlock(&pfil_global_lock) - #endif /* _NET_PFIL_H_ */ diff --git a/freebsd/sys/net/pfkeyv2.h b/freebsd/sys/net/pfkeyv2.h index 35348819..c2cf568f 100644 --- a/freebsd/sys/net/pfkeyv2.h +++ b/freebsd/sys/net/pfkeyv2.h @@ -223,9 +223,12 @@ struct sadb_x_policy { u_int16_t sadb_x_policy_exttype; u_int16_t sadb_x_policy_type; /* See policy type of ipsec.h */ u_int8_t sadb_x_policy_dir; /* direction, see ipsec.h */ - u_int8_t sadb_x_policy_reserved; + u_int8_t sadb_x_policy_scope; /* scope, see ipsec.h */ u_int32_t sadb_x_policy_id; u_int32_t sadb_x_policy_priority; +#define sadb_x_policy_reserved sadb_x_policy_scope +/* Policy with ifnet scope uses priority field to store ifindex */ +#define sadb_x_policy_ifindex sadb_x_policy_priority }; _Static_assert(sizeof(struct sadb_x_policy) == 16, "struct size mismatch"); @@ -320,7 +323,9 @@ _Static_assert(sizeof(struct sadb_x_sa_replay) == 8, "struct size mismatch"); #define SADB_X_EXT_NAT_T_OAR 24 /* Peer's NAT_OA for dst of SA. */ #define SADB_X_EXT_NAT_T_FRAG 25 /* Manual MTU override. */ #define SADB_X_EXT_SA_REPLAY 26 /* Replay window override. */ -#define SADB_EXT_MAX 26 +#define SADB_X_EXT_NEW_ADDRESS_SRC 27 +#define SADB_X_EXT_NEW_ADDRESS_DST 28 +#define SADB_EXT_MAX 28 #define SADB_SATYPE_UNSPEC 0 #define SADB_SATYPE_AH 2 diff --git a/freebsd/sys/net/pfvar.h b/freebsd/sys/net/pfvar.h index 17768e96..cf0b3b2c 100644 --- a/freebsd/sys/net/pfvar.h +++ b/freebsd/sys/net/pfvar.h @@ -154,6 +154,8 @@ extern struct rwlock pf_rules_lock; #define PF_RULES_RASSERT() rw_assert(&pf_rules_lock, RA_RLOCKED) #define PF_RULES_WASSERT() rw_assert(&pf_rules_lock, RA_WLOCKED) +extern struct sx pf_end_lock; + #define PF_MODVER 1 #define PFLOG_MODVER 1 #define PFSYNC_MODVER 1 diff --git a/freebsd/sys/net/radix.c b/freebsd/sys/net/radix.c index 2615de65..35a5cfb4 100644 --- a/freebsd/sys/net/radix.c +++ b/freebsd/sys/net/radix.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/radix.h b/freebsd/sys/net/radix.h index 69aad831..f9c55164 100644 --- a/freebsd/sys/net/radix.h +++ b/freebsd/sys/net/radix.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/raw_cb.c b/freebsd/sys/net/raw_cb.c index 00a199f3..498495ac 100644 --- a/freebsd/sys/net/raw_cb.c +++ b/freebsd/sys/net/raw_cb.c @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/raw_cb.h b/freebsd/sys/net/raw_cb.h index 1b347e02..c2d002e6 100644 --- a/freebsd/sys/net/raw_cb.h +++ b/freebsd/sys/net/raw_cb.h @@ -11,7 +11,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/raw_usrreq.c b/freebsd/sys/net/raw_usrreq.c index 6e9668f3..96b610a0 100644 --- a/freebsd/sys/net/raw_usrreq.c +++ b/freebsd/sys/net/raw_usrreq.c @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c index a851efa8..a16617f8 100644 --- a/freebsd/sys/net/route.c +++ b/freebsd/sys/net/route.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h index d44dc9d5..93193b5f 100644 --- a/freebsd/sys/net/route.h +++ b/freebsd/sys/net/route.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -135,7 +135,7 @@ VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */ #endif #endif -#if defined(_KERNEL) || defined(_WANT_RTENTRY) +#if defined(_KERNEL) struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ /* @@ -159,7 +159,7 @@ struct rtentry { struct mtx rt_mtx; /* mutex for routing entry */ struct rtentry *rt_chain; /* pointer to next rtentry to delete */ }; -#endif /* _KERNEL || _WANT_RTENTRY */ +#endif /* _KERNEL */ #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ diff --git a/freebsd/sys/net/route_var.h b/freebsd/sys/net/route_var.h index 914bcfe2..f32dbc21 100644 --- a/freebsd/sys/net/route_var.h +++ b/freebsd/sys/net/route_var.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c index 97b92127..e9a31018 100644 --- a/freebsd/sys/net/rtsock.c +++ b/freebsd/sys/net/rtsock.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/slcompress.c b/freebsd/sys/net/slcompress.c index 4b5a9f97..db1c75eb 100644 --- a/freebsd/sys/net/slcompress.c +++ b/freebsd/sys/net/slcompress.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net/slcompress.h b/freebsd/sys/net/slcompress.h index 794d8b83..bd53c081 100644 --- a/freebsd/sys/net/slcompress.h +++ b/freebsd/sys/net/slcompress.h @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/net80211/_ieee80211.h b/freebsd/sys/net80211/_ieee80211.h index 13155ea3..9434f3a6 100644 --- a/freebsd/sys/net80211/_ieee80211.h +++ b/freebsd/sys/net80211/_ieee80211.h @@ -313,6 +313,12 @@ struct ieee80211_channel { #define IEEE80211_IS_CHAN_VHT(_c) \ (((_c)->ic_flags & IEEE80211_CHAN_VHT) != 0) +#define IEEE80211_IS_CHAN_VHT_2GHZ(_c) \ + (IEEE80211_IS_CHAN_2GHZ(_c) && \ + ((_c)->ic_flags & IEEE80211_CHAN_VHT) != 0) +#define IEEE80211_IS_CHAN_VHT_5GHZ(_c) \ + (IEEE80211_IS_CHAN_5GHZ(_c) && \ + ((_c)->ic_flags & IEEE80211_CHAN_VHT) != 0) #define IEEE80211_IS_CHAN_VHT20(_c) \ (((_c)->ic_flags & IEEE80211_CHAN_VHT20) != 0) #define IEEE80211_IS_CHAN_VHT40(_c) \ @@ -437,17 +443,26 @@ struct ieee80211_regdomain { /* * MIMO antenna/radio state. */ - +#define IEEE80211_MAX_CHAINS 4 /* - * XXX This doesn't yet export both ctl/ext chain details - * XXX TODO: IEEE80211_MAX_CHAINS is defined in _freebsd.h, not here; - * figure out how to pull it in! + * This is the number of sub-channels for a channel. + * 0 - pri20 + * 1 - sec20 (HT40, VHT40) + * 2 - sec40 (VHT80) + * 3 - sec80 (VHT80+80, VHT160) */ +#define IEEE80211_MAX_CHAIN_PRISEC 4 +#define IEEE80211_MAX_EVM_DWORDS 16 /* 16 pilots, 4 chains */ +#define IEEE80211_MAX_EVM_PILOTS 16 /* 468 subcarriers, 16 pilots */ + +struct ieee80211_mimo_chan_info { + int8_t rssi[IEEE80211_MAX_CHAIN_PRISEC]; + int8_t noise[IEEE80211_MAX_CHAIN_PRISEC]; +}; + struct ieee80211_mimo_info { - int8_t rssi[3]; /* per-antenna rssi */ - int8_t noise[3]; /* per-antenna noise floor */ - uint8_t pad[2]; - uint32_t evm[3]; /* EVM data */ + struct ieee80211_mimo_chan_info ch[IEEE80211_MAX_CHAINS]; + uint32_t evm[IEEE80211_MAX_EVM_DWORDS]; }; /* @@ -511,9 +526,100 @@ struct ieee80211_mimo_info { #define IEEE80211_HTC_RXMCS32 0x00400000 /* CAPABILITY: MCS32 support */ #define IEEE80211_HTC_TXUNEQUAL 0x00800000 /* CAPABILITY: TX unequal MCS */ #define IEEE80211_HTC_TXMCS32 0x01000000 /* CAPABILITY: MCS32 support */ +#define IEEE80211_HTC_TXLDPC 0x02000000 /* CAPABILITY: TX using LDPC */ #define IEEE80211_C_HTCAP_BITS \ "\20\1LDPC\2CHWIDTH40\5GREENFIELD\6SHORTGI20\7SHORTGI40\10TXSTBC" \ - "\21AMPDU\22AMSDU\23HT\24SMPS\25RIFS" + "\21AMPDU\22AMSDU\23HT\24SMPS\25RIFS\32TXLDPC" + +/* + * RX status notification - which fields are valid. + */ +#define IEEE80211_R_NF 0x00000001 /* global NF value valid */ +#define IEEE80211_R_RSSI 0x00000002 /* global RSSI value valid */ +#define IEEE80211_R_C_CHAIN 0x00000004 /* RX chain count valid */ +#define IEEE80211_R_C_NF 0x00000008 /* per-chain NF value valid */ +#define IEEE80211_R_C_RSSI 0x00000010 /* per-chain RSSI value valid */ +#define IEEE80211_R_C_EVM 0x00000020 /* per-chain EVM valid */ +#define IEEE80211_R_C_HT40 0x00000040 /* RX'ed packet is 40mhz, pilots 4,5 valid */ +#define IEEE80211_R_FREQ 0x00000080 /* Freq value populated, MHz */ +#define IEEE80211_R_IEEE 0x00000100 /* IEEE value populated */ +#define IEEE80211_R_BAND 0x00000200 /* Frequency band populated */ +#define IEEE80211_R_TSF32 0x00004000 /* 32 bit TSF */ +#define IEEE80211_R_TSF64 0x00008000 /* 64 bit TSF */ +#define IEEE80211_R_TSF_START 0x00010000 /* TSF is sampled at start of frame */ +#define IEEE80211_R_TSF_END 0x00020000 /* TSF is sampled at end of frame */ + +/* + * RX status notification - describe the packet. + */ +#define IEEE80211_RX_F_STBC 0x00000001 +#define IEEE80211_RX_F_LDPC 0x00000002 +#define IEEE80211_RX_F_AMSDU 0x00000004 /* This is the start of an decap AMSDU list */ +#define IEEE80211_RX_F_AMSDU_MORE 0x00000008 /* This is another decap AMSDU frame in the batch */ +#define IEEE80211_RX_F_AMPDU 0x00000010 /* This is the start of an decap AMPDU list */ +#define IEEE80211_RX_F_AMPDU_MORE 0x00000020 /* This is another decap AMPDU frame in the batch */ +#define IEEE80211_RX_F_FAIL_FCSCRC 0x00000040 /* Failed CRC/FCS */ +#define IEEE80211_RX_F_FAIL_MIC 0x00000080 /* Failed MIC check */ +#define IEEE80211_RX_F_DECRYPTED 0x00000100 /* Hardware decrypted */ +#define IEEE80211_RX_F_IV_STRIP 0x00000200 /* Decrypted; IV stripped */ +#define IEEE80211_RX_F_MMIC_STRIP 0x00000400 /* Decrypted; MMIC stripped */ +#define IEEE80211_RX_F_SHORTGI 0x00000800 /* This is a short-GI frame */ +#define IEEE80211_RX_F_CCK 0x00001000 +#define IEEE80211_RX_F_OFDM 0x00002000 +#define IEEE80211_RX_F_HT 0x00004000 +#define IEEE80211_RX_F_VHT 0x00008000 + +/* Channel width */ +#define IEEE80211_RX_FW_20MHZ 1 +#define IEEE80211_RX_FW_40MHZ 2 +#define IEEE80211_RX_FW_80MHZ 3 + +/* PHY type */ +#define IEEE80211_RX_FP_11B 1 +#define IEEE80211_RX_FP_11G 2 +#define IEEE80211_RX_FP_11A 3 +#define IEEE80211_RX_FP_11NA 4 +#define IEEE80211_RX_FP_11NG 5 + +struct ieee80211_rx_stats { + uint32_t r_flags; /* IEEE80211_R_* flags */ + uint32_t c_pktflags; /* IEEE80211_RX_F_* flags */ + + uint64_t c_rx_tsf; /* 32 or 64 bit TSF */ + + /* All DWORD aligned */ + int16_t c_nf_ctl[IEEE80211_MAX_CHAINS]; /* per-chain NF */ + int16_t c_nf_ext[IEEE80211_MAX_CHAINS]; /* per-chain NF */ + int16_t c_rssi_ctl[IEEE80211_MAX_CHAINS]; /* per-chain RSSI */ + int16_t c_rssi_ext[IEEE80211_MAX_CHAINS]; /* per-chain RSSI */ + + /* 32 bits */ + uint8_t c_nf; /* global NF */ + uint8_t c_rssi; /* global RSSI */ + uint8_t c_chain; /* number of RX chains involved */ + uint8_t c_rate; /* legacy; 11n rate code; VHT MCS */ + + /* 32 bits */ + uint16_t c_freq; /* Frequency, MHz */ + uint8_t c_ieee; /* Channel */ + uint8_t c_width; /* channel width, FW flags above */ + + /* Force alignment to DWORD */ + union { + uint8_t evm[IEEE80211_MAX_CHAINS][IEEE80211_MAX_EVM_PILOTS]; + /* per-chain, per-pilot EVM values */ + uint32_t __aln[8]; + } evm; + + /* 32 bits */ + uint8_t c_phytype; /* PHY type, FW flags above */ + uint8_t c_vhtnss; /* VHT - number of spatial streams */ + uint8_t c_pad2[2]; +}; + +struct ieee80211_rx_params { + struct ieee80211_rx_stats params; +}; #endif /* _NET80211__IEEE80211_H_ */ diff --git a/freebsd/sys/net80211/ieee80211.c b/freebsd/sys/net80211/ieee80211.c index 7fcd3dcd..9e9be3d7 100644 --- a/freebsd/sys/net80211/ieee80211.c +++ b/freebsd/sys/net80211/ieee80211.c @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include #endif #include +#include #include @@ -72,6 +73,8 @@ const char *ieee80211_phymode_name[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_QUARTER] = "quarter", [IEEE80211_MODE_11NA] = "11na", [IEEE80211_MODE_11NG] = "11ng", + [IEEE80211_MODE_VHT_2GHZ] = "11acg", + [IEEE80211_MODE_VHT_5GHZ] = "11ac", }; /* map ieee80211_opmode to the corresponding capability bit */ const int ieee80211_opcap[IEEE80211_OPMODE_MAX] = { @@ -119,6 +122,8 @@ static const struct ieee80211_rateset ieee80211_rateset_11g = { 12, { B(2), B(4), B(11), B(22), 12, 18, 24, 36, 48, 72, 96, 108 } }; #undef B +static int set_vht_extchan(struct ieee80211_channel *c); + /* * Fill in 802.11 available channel set, mark * all available channels as active, and pick @@ -150,10 +155,23 @@ ieee80211_chan_init(struct ieee80211com *ic) */ if (c->ic_ieee == 0) c->ic_ieee = ieee80211_mhz2ieee(c->ic_freq,c->ic_flags); + + /* + * Setup the HT40/VHT40 upper/lower bits. + * The VHT80 math is done elsewhere. + */ if (IEEE80211_IS_CHAN_HT40(c) && c->ic_extieee == 0) c->ic_extieee = ieee80211_mhz2ieee(c->ic_freq + (IEEE80211_IS_CHAN_HT40U(c) ? 20 : -20), c->ic_flags); + + /* Update VHT math */ + /* + * XXX VHT again, note that this assumes VHT80 channels + * are legit already + */ + set_vht_extchan(c); + /* default max tx power to max regulatory */ if (c->ic_maxpower == 0) c->ic_maxpower = 2*c->ic_maxregpower; @@ -183,6 +201,10 @@ ieee80211_chan_init(struct ieee80211com *ic) setbit(ic->ic_modecaps, IEEE80211_MODE_11NA); if (IEEE80211_IS_CHAN_HTG(c)) setbit(ic->ic_modecaps, IEEE80211_MODE_11NG); + if (IEEE80211_IS_CHAN_VHTA(c)) + setbit(ic->ic_modecaps, IEEE80211_MODE_VHT_5GHZ); + if (IEEE80211_IS_CHAN_VHTG(c)) + setbit(ic->ic_modecaps, IEEE80211_MODE_VHT_2GHZ); } /* initialize candidate channels to all available */ memcpy(ic->ic_chan_active, ic->ic_chan_avail, @@ -210,6 +232,8 @@ ieee80211_chan_init(struct ieee80211com *ic) DEFAULTRATES(IEEE80211_MODE_QUARTER, ieee80211_rateset_quarter); DEFAULTRATES(IEEE80211_MODE_11NA, ieee80211_rateset_11a); DEFAULTRATES(IEEE80211_MODE_11NG, ieee80211_rateset_11g); + DEFAULTRATES(IEEE80211_MODE_VHT_2GHZ, ieee80211_rateset_11g); + DEFAULTRATES(IEEE80211_MODE_VHT_5GHZ, ieee80211_rateset_11a); /* * Setup required information to fill the mcsset field, if driver did @@ -220,6 +244,8 @@ ieee80211_chan_init(struct ieee80211com *ic) if (ic->ic_txstream == 0) ic->ic_txstream = 2; + ieee80211_init_suphtrates(ic); + /* * Set auto mode to reset active channel state and any desired channel. */ @@ -337,6 +363,7 @@ ieee80211_ifattach(struct ieee80211com *ic) ieee80211_superg_attach(ic); #endif ieee80211_ht_attach(ic); + ieee80211_vht_attach(ic); ieee80211_scan_attach(ic); ieee80211_regdomain_attach(ic); ieee80211_dfs_attach(ic); @@ -380,6 +407,7 @@ ieee80211_ifdetach(struct ieee80211com *ic) #ifdef IEEE80211_SUPPORT_SUPERG ieee80211_superg_detach(ic); #endif + ieee80211_vht_detach(ic); ieee80211_ht_detach(ic); /* NB: must be called before ieee80211_node_detach */ ieee80211_proto_detach(ic); @@ -509,8 +537,15 @@ ieee80211_vap_setup(struct ieee80211com *ic, struct ieee80211vap *vap, vap->iv_flags_ext = ic->ic_flags_ext; vap->iv_flags_ven = ic->ic_flags_ven; vap->iv_caps = ic->ic_caps &~ IEEE80211_C_OPMODE; + + /* 11n capabilities - XXX methodize */ vap->iv_htcaps = ic->ic_htcaps; vap->iv_htextcaps = ic->ic_htextcaps; + + /* 11ac capabilities - XXX methodize */ + vap->iv_vhtcaps = ic->ic_vhtcaps; + vap->iv_vhtextcaps = ic->ic_vhtextcaps; + vap->iv_opmode = opmode; vap->iv_caps |= ieee80211_opcap[opmode]; IEEE80211_ADDR_COPY(vap->iv_myaddr, ic->ic_macaddr); @@ -595,6 +630,7 @@ ieee80211_vap_setup(struct ieee80211com *ic, struct ieee80211vap *vap, ieee80211_superg_vattach(vap); #endif ieee80211_ht_vattach(vap); + ieee80211_vht_vattach(vap); ieee80211_scan_vattach(vap); ieee80211_regdomain_vattach(vap); ieee80211_radiotap_vattach(vap); @@ -693,6 +729,8 @@ ieee80211_vap_detach(struct ieee80211vap *vap) */ ieee80211_draintask(ic, &vap->iv_nstate_task); ieee80211_draintask(ic, &vap->iv_swbmiss_task); + ieee80211_draintask(ic, &vap->iv_wme_task); + ieee80211_draintask(ic, &ic->ic_parent_task); /* XXX band-aid until ifnet handles this for us */ taskqueue_drain(taskqueue_swi, &ifp->if_linktask); @@ -731,6 +769,7 @@ ieee80211_vap_detach(struct ieee80211vap *vap) #ifdef IEEE80211_SUPPORT_SUPERG ieee80211_superg_vdetach(vap); #endif + ieee80211_vht_vdetach(vap); ieee80211_ht_vdetach(vap); /* NB: must be before ieee80211_node_vdetach */ ieee80211_proto_vdetach(vap); @@ -1075,6 +1114,110 @@ set_extchan(struct ieee80211_channel *c) c->ic_extieee = 0; } +/* + * Populate the freq1/freq2 fields as appropriate for VHT channels. + * + * This for now uses a hard-coded list of 80MHz wide channels. + * + * For HT20/HT40, freq1 just is the centre frequency of the 40MHz + * wide channel we've already decided upon. + * + * For VHT80 and VHT160, there are only a small number of fixed + * 80/160MHz wide channels, so we just use those. + * + * This is all likely very very wrong - both the regulatory code + * and this code needs to ensure that all four channels are + * available and valid before the VHT80 (and eight for VHT160) channel + * is created. + */ + +struct vht_chan_range { + uint16_t freq_start; + uint16_t freq_end; +}; + +struct vht_chan_range vht80_chan_ranges[] = { + { 5170, 5250 }, + { 5250, 5330 }, + { 5490, 5570 }, + { 5570, 5650 }, + { 5650, 5730 }, + { 5735, 5815 }, + { 0, 0, } +}; + +static int +set_vht_extchan(struct ieee80211_channel *c) +{ + int i; + + if (! IEEE80211_IS_CHAN_VHT(c)) { + return (0); + } + + if (IEEE80211_IS_CHAN_VHT20(c)) { + c->ic_vht_ch_freq1 = c->ic_ieee; + return (1); + } + + if (IEEE80211_IS_CHAN_VHT40(c)) { + if (IEEE80211_IS_CHAN_HT40U(c)) + c->ic_vht_ch_freq1 = c->ic_ieee + 2; + else if (IEEE80211_IS_CHAN_HT40D(c)) + c->ic_vht_ch_freq1 = c->ic_ieee - 2; + else + return (0); + return (1); + } + + if (IEEE80211_IS_CHAN_VHT80(c)) { + for (i = 0; vht80_chan_ranges[i].freq_start != 0; i++) { + if (c->ic_freq >= vht80_chan_ranges[i].freq_start && + c->ic_freq < vht80_chan_ranges[i].freq_end) { + int midpoint; + + midpoint = vht80_chan_ranges[i].freq_start + 40; + c->ic_vht_ch_freq1 = + ieee80211_mhz2ieee(midpoint, c->ic_flags); + c->ic_vht_ch_freq2 = 0; +#if 0 + printf("%s: %d, freq=%d, midpoint=%d, freq1=%d, freq2=%d\n", + __func__, c->ic_ieee, c->ic_freq, midpoint, + c->ic_vht_ch_freq1, c->ic_vht_ch_freq2); +#endif + return (1); + } + } + return (0); + } + + printf("%s: unknown VHT channel type (ieee=%d, flags=0x%08x)\n", + __func__, + c->ic_ieee, + c->ic_flags); + + return (0); +} + +/* + * Return whether the current channel could possibly be a part of + * a VHT80 channel. + * + * This doesn't check that the whole range is in the allowed list + * according to regulatory. + */ +static int +is_vht80_valid_freq(uint16_t freq) +{ + int i; + for (i = 0; vht80_chan_ranges[i].freq_start != 0; i++) { + if (freq >= vht80_chan_ranges[i].freq_start && + freq < vht80_chan_ranges[i].freq_end) + return (1); + } + return (0); +} + static int addchan(struct ieee80211_channel chans[], int maxchans, int *nchans, uint8_t ieee, uint16_t freq, int8_t maxregpower, uint32_t flags) @@ -1084,13 +1227,25 @@ addchan(struct ieee80211_channel chans[], int maxchans, int *nchans, if (*nchans >= maxchans) return (ENOBUFS); +#if 0 + printf("%s: %d: ieee=%d, freq=%d, flags=0x%08x\n", + __func__, + *nchans, + ieee, + freq, + flags); +#endif + c = &chans[(*nchans)++]; c->ic_ieee = ieee; c->ic_freq = freq != 0 ? freq : ieee80211_ieee2mhz(ieee, flags); c->ic_maxregpower = maxregpower; c->ic_maxpower = 2 * maxregpower; c->ic_flags = flags; + c->ic_vht_ch_freq1 = 0; + c->ic_vht_ch_freq2 = 0; set_extchan(c); + set_vht_extchan(c); return (0); } @@ -1106,14 +1261,27 @@ copychan_prev(struct ieee80211_channel chans[], int maxchans, int *nchans, if (*nchans >= maxchans) return (ENOBUFS); +#if 0 + printf("%s: %d: flags=0x%08x\n", + __func__, + *nchans, + flags); +#endif + c = &chans[(*nchans)++]; c[0] = c[-1]; c->ic_flags = flags; + c->ic_vht_ch_freq1 = 0; + c->ic_vht_ch_freq2 = 0; set_extchan(c); + set_vht_extchan(c); return (0); } +/* + * XXX VHT-2GHz + */ static void getflags_2ghz(const uint8_t bands[], uint32_t flags[], int ht40) { @@ -1134,35 +1302,73 @@ getflags_2ghz(const uint8_t bands[], uint32_t flags[], int ht40) } static void -getflags_5ghz(const uint8_t bands[], uint32_t flags[], int ht40) +getflags_5ghz(const uint8_t bands[], uint32_t flags[], int ht40, int vht80) { int nmodes; + /* + * the addchan_list function seems to expect the flags array to + * be in channel width order, so the VHT bits are interspersed + * as appropriate to maintain said order. + * + * It also assumes HT40U is before HT40D. + */ nmodes = 0; + + /* 20MHz */ if (isset(bands, IEEE80211_MODE_11A)) flags[nmodes++] = IEEE80211_CHAN_A; if (isset(bands, IEEE80211_MODE_11NA)) flags[nmodes++] = IEEE80211_CHAN_A | IEEE80211_CHAN_HT20; + if (isset(bands, IEEE80211_MODE_VHT_5GHZ)) { + flags[nmodes++] = IEEE80211_CHAN_A | IEEE80211_CHAN_HT20 | + IEEE80211_CHAN_VHT20; + } + + /* 40MHz */ if (ht40) { flags[nmodes++] = IEEE80211_CHAN_A | IEEE80211_CHAN_HT40U; + } + if (ht40 && isset(bands, IEEE80211_MODE_VHT_5GHZ)) { + flags[nmodes++] = IEEE80211_CHAN_A | IEEE80211_CHAN_HT40U + | IEEE80211_CHAN_VHT40U; + } + if (ht40) { flags[nmodes++] = IEEE80211_CHAN_A | IEEE80211_CHAN_HT40D; } + if (ht40 && isset(bands, IEEE80211_MODE_VHT_5GHZ)) { + flags[nmodes++] = IEEE80211_CHAN_A | IEEE80211_CHAN_HT40D + | IEEE80211_CHAN_VHT40D; + } + + /* 80MHz */ + if (vht80 && isset(bands, IEEE80211_MODE_VHT_5GHZ)) { + flags[nmodes++] = IEEE80211_CHAN_A | + IEEE80211_CHAN_HT40U | IEEE80211_CHAN_VHT80; + flags[nmodes++] = IEEE80211_CHAN_A | + IEEE80211_CHAN_HT40D | IEEE80211_CHAN_VHT80; + } + + /* XXX VHT80+80 */ + /* XXX VHT160 */ flags[nmodes] = 0; } static void -getflags(const uint8_t bands[], uint32_t flags[], int ht40) +getflags(const uint8_t bands[], uint32_t flags[], int ht40, int vht80) { flags[0] = 0; if (isset(bands, IEEE80211_MODE_11A) || - isset(bands, IEEE80211_MODE_11NA)) { + isset(bands, IEEE80211_MODE_11NA) || + isset(bands, IEEE80211_MODE_VHT_5GHZ)) { if (isset(bands, IEEE80211_MODE_11B) || isset(bands, IEEE80211_MODE_11G) || - isset(bands, IEEE80211_MODE_11NG)) + isset(bands, IEEE80211_MODE_11NG) || + isset(bands, IEEE80211_MODE_VHT_2GHZ)) return; - getflags_5ghz(bands, flags, ht40); + getflags_5ghz(bands, flags, ht40, vht80); } else getflags_2ghz(bands, flags, ht40); } @@ -1170,6 +1376,7 @@ getflags(const uint8_t bands[], uint32_t flags[], int ht40) /* * Add one 20 MHz channel into specified channel list. */ +/* XXX VHT */ int ieee80211_add_channel(struct ieee80211_channel chans[], int maxchans, int *nchans, uint8_t ieee, uint16_t freq, int8_t maxregpower, @@ -1178,7 +1385,7 @@ ieee80211_add_channel(struct ieee80211_channel chans[], int maxchans, uint32_t flags[IEEE80211_MODE_MAX]; int i, error; - getflags(bands, flags, 0); + getflags(bands, flags, 0, 0); KASSERT(flags[0] != 0, ("%s: no correct mode provided\n", __func__)); error = addchan(chans, maxchans, nchans, ieee, freq, maxregpower, @@ -1212,6 +1419,7 @@ findchannel(struct ieee80211_channel chans[], int nchans, uint16_t freq, /* * Add 40 MHz channel pair into specified channel list. */ +/* XXX VHT */ int ieee80211_add_channel_ht40(struct ieee80211_channel chans[], int maxchans, int *nchans, uint8_t ieee, int8_t maxregpower, uint32_t flags) @@ -1269,11 +1477,17 @@ ieee80211_get_channel_center_freq(const struct ieee80211_channel *c) * For 80+80MHz channels this will be the centre of the primary * 80MHz channel; the secondary 80MHz channel will be center_freq2(). */ - uint32_t ieee80211_get_channel_center_freq1(const struct ieee80211_channel *c) { + /* + * VHT - use the pre-calculated centre frequency + * of the given channel. + */ + if (IEEE80211_IS_CHAN_VHT(c)) + return (ieee80211_ieee2mhz(c->ic_vht_ch_freq1, c->ic_flags)); + if (IEEE80211_IS_CHAN_HT40U(c)) { return (c->ic_freq + 10); } @@ -1285,12 +1499,15 @@ ieee80211_get_channel_center_freq1(const struct ieee80211_channel *c) } /* - * For now, no 80+80 support; this is zero. + * For now, no 80+80 support; it will likely always return 0. */ uint32_t ieee80211_get_channel_center_freq2(const struct ieee80211_channel *c) { + if (IEEE80211_IS_CHAN_VHT(c) && (c->ic_vht_ch_freq2 != 0)) + return (ieee80211_ieee2mhz(c->ic_vht_ch_freq2, c->ic_flags)); + return (0); } @@ -1304,16 +1521,70 @@ add_chanlist(struct ieee80211_channel chans[], int maxchans, int *nchans, { uint16_t freq; int i, j, error; + int is_vht; for (i = 0; i < nieee; i++) { freq = ieee80211_ieee2mhz(ieee[i], flags[0]); for (j = 0; flags[j] != 0; j++) { + /* + * Notes: + * + HT40 and VHT40 channels occur together, so + * we need to be careful that we actually allow that. + * + VHT80, VHT160 will coexist with HT40/VHT40, so + * make sure it's not skipped because of the overlap + * check used for (V)HT40. + */ + is_vht = !! (flags[j] & IEEE80211_CHAN_VHT); + + /* + * Test for VHT80. + * XXX This is all very broken right now. + * What we /should/ do is: + * + * + check that the frequency is in the list of + * allowed VHT80 ranges; and + * + the other 3 channels in the list are actually + * also available. + */ + if (is_vht && flags[j] & IEEE80211_CHAN_VHT80) + if (! is_vht80_valid_freq(freq)) + continue; + + /* + * Test for (V)HT40. + * + * This is also a fall through from VHT80; as we only + * allow a VHT80 channel if the VHT40 combination is + * also valid. If the VHT40 form is not valid then + * we certainly can't do VHT80.. + */ if (flags[j] & IEEE80211_CHAN_HT40D) + /* + * Can't have a "lower" channel if we are the + * first channel. + * + * Can't have a "lower" channel if it's below/ + * within 20MHz of the first channel. + * + * Can't have a "lower" channel if the channel + * below it is not 20MHz away. + */ if (i == 0 || ieee[i] < ieee[0] + 4 || freq - 20 != ieee80211_ieee2mhz(ieee[i] - 4, flags[j])) continue; if (flags[j] & IEEE80211_CHAN_HT40U) + /* + * Can't have an "upper" channel if we are + * the last channel. + * + * Can't have an "upper" channel be above the + * last channel in the list. + * + * Can't have an "upper" channel if the next + * channel according to the math isn't 20MHz + * away. (Likely for channel 13/14.) + */ if (i == nieee - 1 || ieee[i] + 4 > ieee[nieee - 1] || freq + 20 != @@ -1342,6 +1613,7 @@ ieee80211_add_channel_list_2ghz(struct ieee80211_channel chans[], int maxchans, { uint32_t flags[IEEE80211_MODE_MAX]; + /* XXX no VHT for now */ getflags_2ghz(bands, flags, ht40); KASSERT(flags[0] != 0, ("%s: no correct mode provided\n", __func__)); @@ -1354,8 +1626,15 @@ ieee80211_add_channel_list_5ghz(struct ieee80211_channel chans[], int maxchans, int ht40) { uint32_t flags[IEEE80211_MODE_MAX]; + int vht80 = 0; + + /* + * For now, assume VHT == VHT80 support as a minimum. + */ + if (isset(bands, IEEE80211_MODE_VHT_5GHZ)) + vht80 = 1; - getflags_5ghz(bands, flags, ht40); + getflags_5ghz(bands, flags, ht40, vht80); KASSERT(flags[0] != 0, ("%s: no correct mode provided\n", __func__)); return (add_chanlist(chans, maxchans, nchans, ieee, nieee, flags)); @@ -1494,6 +1773,8 @@ addmedia(struct ifmedia *media, int caps, int addsta, int mode, int mword) [IEEE80211_MODE_QUARTER] = IFM_IEEE80211_11A, /* XXX */ [IEEE80211_MODE_11NA] = IFM_IEEE80211_11NA, [IEEE80211_MODE_11NG] = IFM_IEEE80211_11NG, + [IEEE80211_MODE_VHT_2GHZ] = IFM_IEEE80211_VHT2G, + [IEEE80211_MODE_VHT_5GHZ] = IFM_IEEE80211_VHT5G, }; u_int mopt; @@ -1606,6 +1887,19 @@ ieee80211_media_setup(struct ieee80211com *ic, if (rate > maxrate) maxrate = rate; } + + /* + * Add VHT media. + */ + for (; mode <= IEEE80211_MODE_VHT_5GHZ; mode++) { + if (isclr(ic->ic_modecaps, mode)) + continue; + addmedia(media, caps, addsta, mode, IFM_AUTO); + addmedia(media, caps, addsta, mode, IFM_IEEE80211_VHT); + + /* XXX TODO: VHT maxrate */ + } + return maxrate; } @@ -1617,6 +1911,14 @@ ieee80211_get_suprates(struct ieee80211com *ic, const struct ieee80211_channel * return &ic->ic_sup_rates[ieee80211_chan2mode(c)]; } +/* XXX inline or eliminate? */ +const struct ieee80211_htrateset * +ieee80211_get_suphtrates(struct ieee80211com *ic, + const struct ieee80211_channel *c) +{ + return &ic->ic_sup_htrates; +} + void ieee80211_announce(struct ieee80211com *ic) { @@ -1641,6 +1943,7 @@ ieee80211_announce(struct ieee80211com *ic) printf("\n"); } ieee80211_ht_announce(ic); + ieee80211_vht_announce(ic); } void @@ -1885,7 +2188,11 @@ enum ieee80211_phymode ieee80211_chan2mode(const struct ieee80211_channel *chan) { - if (IEEE80211_IS_CHAN_HTA(chan)) + if (IEEE80211_IS_CHAN_VHT_2GHZ(chan)) + return IEEE80211_MODE_VHT_2GHZ; + else if (IEEE80211_IS_CHAN_VHT_5GHZ(chan)) + return IEEE80211_MODE_VHT_5GHZ; + else if (IEEE80211_IS_CHAN_HTA(chan)) return IEEE80211_MODE_11NA; else if (IEEE80211_IS_CHAN_HTG(chan)) return IEEE80211_MODE_11NG; diff --git a/freebsd/sys/net80211/ieee80211.h b/freebsd/sys/net80211/ieee80211.h index aa2ddb09..9fef8c44 100644 --- a/freebsd/sys/net80211/ieee80211.h +++ b/freebsd/sys/net80211/ieee80211.h @@ -165,6 +165,12 @@ struct ieee80211_qosframe_addr4 { #define IEEE80211_IS_MGMT(wh) \ (!! (((wh)->i_fc[0] & IEEE80211_FC0_TYPE_MASK) \ == IEEE80211_FC0_TYPE_MGT)) +#define IEEE80211_IS_CTL(wh) \ + (!! (((wh)->i_fc[0] & IEEE80211_FC0_TYPE_MASK) \ + == IEEE80211_FC0_TYPE_CTL)) +#define IEEE80211_IS_DATA(wh) \ + (!! (((wh)->i_fc[0] & IEEE80211_FC0_TYPE_MASK) \ + == IEEE80211_FC0_TYPE_DATA)) #define IEEE80211_FC0_QOSDATA \ (IEEE80211_FC0_TYPE_DATA|IEEE80211_FC0_SUBTYPE_QOS|IEEE80211_FC0_VERSION_0) @@ -611,7 +617,7 @@ struct ieee80211_ie_htcap { } __packed; /* HT capability flags (ht_cap) */ -#define IEEE80211_HTCAP_LDPC 0x0001 /* LDPC supported */ +#define IEEE80211_HTCAP_LDPC 0x0001 /* LDPC rx supported */ #define IEEE80211_HTCAP_CHWIDTH40 0x0002 /* 20/40 supported */ #define IEEE80211_HTCAP_SMPS 0x000c /* SM Power Save mode */ #define IEEE80211_HTCAP_SMPS_OFF 0x000c /* disabled */ @@ -798,37 +804,73 @@ struct ieee80211_ie_vht_operation { #define IEEE80211_VHTCAP_MAX_MPDU_LENGTH_7991 0x00000001 #define IEEE80211_VHTCAP_MAX_MPDU_LENGTH_11454 0x00000002 #define IEEE80211_VHTCAP_MAX_MPDU_MASK 0x00000003 -#define IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_160MHZ 0x00000004 -#define IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ 0x00000008 +#define IEEE80211_VHTCAP_MAX_MPDU_MASK_S 0 + #define IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK 0x0000000C +#define IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK_S 2 +#define IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_NONE 0 +#define IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_160MHZ 1 +#define IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_160_80P80MHZ 2 +#define IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_RESERVED 3 + #define IEEE80211_VHTCAP_RXLDPC 0x00000010 +#define IEEE80211_VHTCAP_RXLDPC_S 4 + #define IEEE80211_VHTCAP_SHORT_GI_80 0x00000020 +#define IEEE80211_VHTCAP_SHORT_GI_80_S 5 + #define IEEE80211_VHTCAP_SHORT_GI_160 0x00000040 +#define IEEE80211_VHTCAP_SHORT_GI_160_S 6 + #define IEEE80211_VHTCAP_TXSTBC 0x00000080 +#define IEEE80211_VHTCAP_TXSTBC_S 7 + #define IEEE80211_VHTCAP_RXSTBC_1 0x00000100 #define IEEE80211_VHTCAP_RXSTBC_2 0x00000200 #define IEEE80211_VHTCAP_RXSTBC_3 0x00000300 #define IEEE80211_VHTCAP_RXSTBC_4 0x00000400 #define IEEE80211_VHTCAP_RXSTBC_MASK 0x00000700 +#define IEEE80211_VHTCAP_RXSTBC_MASK_S 8 + #define IEEE80211_VHTCAP_SU_BEAMFORMER_CAPABLE 0x00000800 +#define IEEE80211_VHTCAP_SU_BEAMFORMER_CAPABLE_S 11 + #define IEEE80211_VHTCAP_SU_BEAMFORMEE_CAPABLE 0x00001000 +#define IEEE80211_VHTCAP_SU_BEAMFORMEE_CAPABLE_S 12 + #define IEEE80211_VHTCAP_BEAMFORMEE_STS_SHIFT 13 #define IEEE80211_VHTCAP_BEAMFORMEE_STS_MASK \ (7 << IEEE80211_VHTCAP_BEAMFORMEE_STS_SHIFT) +#define IEEE80211_VHTCAP_BEAMFORMEE_STS_MASK_S 13 + #define IEEE80211_VHTCAP_SOUNDING_DIMENSIONS_SHIFT 16 #define IEEE80211_VHTCAP_SOUNDING_DIMENSIONS_MASK \ (7 << IEEE80211_VHTCAP_SOUNDING_DIMENSIONS_SHIFT) +#define IEEE80211_VHTCAP_SOUNDING_DIMENSIONS_MASK_S 16 + #define IEEE80211_VHTCAP_MU_BEAMFORMER_CAPABLE 0x00080000 +#define IEEE80211_VHTCAP_MU_BEAMFORMER_CAPABLE_S 19 #define IEEE80211_VHTCAP_MU_BEAMFORMEE_CAPABLE 0x00100000 +#define IEEE80211_VHTCAP_MU_BEAMFORMEE_CAPABLE_S 20 #define IEEE80211_VHTCAP_VHT_TXOP_PS 0x00200000 +#define IEEE80211_VHTCAP_VHT_TXOP_PS_S 21 #define IEEE80211_VHTCAP_HTC_VHT 0x00400000 +#define IEEE80211_VHTCAP_HTC_VHT_S 22 + #define IEEE80211_VHTCAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT 23 #define IEEE80211_VHTCAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK \ (7 << IEEE80211_VHTCAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT) +#define IEEE80211_VHTCAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK_S 23 + +#define IEEE80211_VHTCAP_VHT_LINK_ADAPTATION_VHT_MASK 0x0c000000 #define IEEE80211_VHTCAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB 0x08000000 #define IEEE80211_VHTCAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB 0x0c000000 +#define IEEE80211_VHTCAP_VHT_LINK_ADAPTATION_VHT_MASK_S 26 + #define IEEE80211_VHTCAP_RX_ANTENNA_PATTERN 0x10000000 +#define IEEE80211_VHTCAP_RX_ANTENNA_PATTERN_S 28 #define IEEE80211_VHTCAP_TX_ANTENNA_PATTERN 0x20000000 +#define IEEE80211_VHTCAP_TX_ANTENNA_PATTERN_S 29 /* * XXX TODO: add the rest of the bits diff --git a/freebsd/sys/net80211/ieee80211_adhoc.c b/freebsd/sys/net80211/ieee80211_adhoc.c index 834c84cb..15a037f6 100644 --- a/freebsd/sys/net80211/ieee80211_adhoc.c +++ b/freebsd/sys/net80211/ieee80211_adhoc.c @@ -824,10 +824,14 @@ adhoc_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, #if 0 if (scan.htcap != NULL && scan.htinfo != NULL && (vap->iv_flags_ht & IEEE80211_FHT_HT)) { - if (ieee80211_ht_updateparams(ni, + ieee80211_ht_updateparams(ni, + scan.htcap, scan.htinfo)); + if (ieee80211_ht_updateparams_final(ni, scan.htcap, scan.htinfo)) ht_state_change = 1; } + + /* XXX same for VHT? */ #endif if (ni != NULL) { IEEE80211_RSSI_LPF(ni->ni_avgrssi, rssi); diff --git a/freebsd/sys/net80211/ieee80211_freebsd.c b/freebsd/sys/net80211/ieee80211_freebsd.c index 8c90c2f3..61c0b81d 100644 --- a/freebsd/sys/net80211/ieee80211_freebsd.c +++ b/freebsd/sys/net80211/ieee80211_freebsd.c @@ -182,6 +182,26 @@ ieee80211_sysctl_radar(SYSCTL_HANDLER_ARGS) return 0; } +/* + * For now, just restart everything. + * + * Later on, it'd be nice to have a separate VAP restart to + * full-device restart. + */ +static int +ieee80211_sysctl_vap_restart(SYSCTL_HANDLER_ARGS) +{ + struct ieee80211vap *vap = arg1; + int t = 0, error; + + error = sysctl_handle_int(oidp, &t, 0, req); + if (error || !req->newptr) + return error; + + ieee80211_restart_all(vap->iv_ic); + return 0; +} + void ieee80211_sysctl_attach(struct ieee80211com *ic) { @@ -261,6 +281,12 @@ ieee80211_sysctl_vattach(struct ieee80211vap *vap) &vap->iv_ampdu_mintraffic[WME_AC_VI], 0, "VI traffic tx aggr threshold (pps)"); } + + SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, + "force_restart", CTLTYPE_INT | CTLFLAG_RW, vap, 0, + ieee80211_sysctl_vap_restart, "I", + "force a VAP restart"); + if (vap->iv_caps & IEEE80211_C_DFS) { SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "radar", CTLTYPE_INT | CTLFLAG_RW, vap->iv_ic, 0, diff --git a/freebsd/sys/net80211/ieee80211_freebsd.h b/freebsd/sys/net80211/ieee80211_freebsd.h index 49549e7b..01b9a8f6 100644 --- a/freebsd/sys/net80211/ieee80211_freebsd.h +++ b/freebsd/sys/net80211/ieee80211_freebsd.h @@ -622,98 +622,9 @@ int ieee80211_add_xmit_params(struct mbuf *m, int ieee80211_get_xmit_params(struct mbuf *m, struct ieee80211_bpf_params *); -/* - * Note: this is fine for 3x3 (and 4x4) 11n HT40; - * but getting EVM information for VHT80, VHT160 - * will involve more than 6 EVM pilots. - */ -#define IEEE80211_MAX_CHAINS 4 -#define IEEE80211_MAX_EVM_PILOTS 6 - -#define IEEE80211_R_NF 0x00000001 /* global NF value valid */ -#define IEEE80211_R_RSSI 0x00000002 /* global RSSI value valid */ -#define IEEE80211_R_C_CHAIN 0x00000004 /* RX chain count valid */ -#define IEEE80211_R_C_NF 0x00000008 /* per-chain NF value valid */ -#define IEEE80211_R_C_RSSI 0x00000010 /* per-chain RSSI value valid */ -#define IEEE80211_R_C_EVM 0x00000020 /* per-chain EVM valid */ -#define IEEE80211_R_C_HT40 0x00000040 /* RX'ed packet is 40mhz, pilots 4,5 valid */ -#define IEEE80211_R_FREQ 0x00000080 /* Freq value populated, MHz */ -#define IEEE80211_R_IEEE 0x00000100 /* IEEE value populated */ -#define IEEE80211_R_BAND 0x00000200 /* Frequency band populated */ -#define IEEE80211_R_TSF32 0x00004000 /* 32 bit TSF */ -#define IEEE80211_R_TSF64 0x00008000 /* 64 bit TSF */ -#define IEEE80211_R_TSF_START 0x00010000 /* TSF is sampled at start of frame */ -#define IEEE80211_R_TSF_END 0x00020000 /* TSF is sampled at end of frame */ - -/* RX packet flags - describe the kind of frame */ -#define IEEE80211_RX_F_STBC 0x00000001 -#define IEEE80211_RX_F_LDPC 0x00000002 -#define IEEE80211_RX_F_AMSDU 0x00000004 /* This is the start of an decap AMSDU list */ -#define IEEE80211_RX_F_AMSDU_MORE 0x00000008 /* This is another decap AMSDU frame in the batch */ -#define IEEE80211_RX_F_AMPDU 0x00000010 /* This is the start of an decap AMPDU list */ -#define IEEE80211_RX_F_AMPDU_MORE 0x00000020 /* This is another decap AMPDU frame in the batch */ -#define IEEE80211_RX_F_FAIL_FCSCRC 0x00000040 /* Failed CRC/FCS */ -#define IEEE80211_RX_F_FAIL_MIC 0x00000080 /* Failed MIC check */ -#define IEEE80211_RX_F_DECRYPTED 0x00000100 /* Hardware decrypted */ -#define IEEE80211_RX_F_IV_STRIP 0x00000200 /* Decrypted; IV stripped */ -#define IEEE80211_RX_F_MMIC_STRIP 0x00000400 /* Decrypted; MMIC stripped */ -#define IEEE80211_RX_F_SHORTGI 0x00000800 /* This is a short-GI frame */ -#define IEEE80211_RX_F_CCK 0x00001000 -#define IEEE80211_RX_F_OFDM 0x00002000 -#define IEEE80211_RX_F_HT 0x00004000 -#define IEEE80211_RX_F_VHT 0x00008000 - -/* Channel width */ -#define IEEE80211_RX_FW_20MHZ 1 -#define IEEE80211_RX_FW_40MHZ 2 -#define IEEE80211_RX_FW_80MHZ 3 - -/* PHY type */ -#define IEEE80211_RX_FP_11B 1 -#define IEEE80211_RX_FP_11G 2 -#define IEEE80211_RX_FP_11A 3 -#define IEEE80211_RX_FP_11NA 4 -#define IEEE80211_RX_FP_11NG 5 - -struct ieee80211_rx_stats { - uint32_t r_flags; /* IEEE80211_R_* flags */ - uint32_t c_pktflags; /* IEEE80211_RX_F_* flags */ - - uint64_t c_rx_tsf; /* 32 or 64 bit TSF */ - - /* All DWORD aligned */ - int16_t c_nf_ctl[IEEE80211_MAX_CHAINS]; /* per-chain NF */ - int16_t c_nf_ext[IEEE80211_MAX_CHAINS]; /* per-chain NF */ - int16_t c_rssi_ctl[IEEE80211_MAX_CHAINS]; /* per-chain RSSI */ - int16_t c_rssi_ext[IEEE80211_MAX_CHAINS]; /* per-chain RSSI */ - - /* 32 bits */ - uint8_t c_nf; /* global NF */ - uint8_t c_rssi; /* global RSSI */ - uint8_t c_chain; /* number of RX chains involved */ - uint8_t c_rate; /* legacy; 11n rate code; VHT MCS */ - - /* 32 bits */ - uint16_t c_freq; /* Frequency, MHz */ - uint8_t c_ieee; /* Channel */ - uint8_t c_width; /* channel width, FW flags above */ - - /* Force alignment to DWORD */ - union { - uint8_t evm[IEEE80211_MAX_CHAINS][IEEE80211_MAX_EVM_PILOTS]; - /* per-chain, per-pilot EVM values */ - uint32_t __aln[8]; - } evm; - - /* 32 bits */ - uint8_t c_phytype; /* PHY type, FW flags above */ - uint8_t c_vhtnss; /* VHT - number of spatial streams */ - uint8_t c_pad2[2]; -}; +struct ieee80211_rx_params; +struct ieee80211_rx_stats; -struct ieee80211_rx_params { - struct ieee80211_rx_stats params; -}; int ieee80211_add_rx_params(struct mbuf *m, const struct ieee80211_rx_stats *rxs); int ieee80211_get_rx_params(struct mbuf *m, diff --git a/freebsd/sys/net80211/ieee80211_hostap.c b/freebsd/sys/net80211/ieee80211_hostap.c index 8905c53c..6009ead7 100644 --- a/freebsd/sys/net80211/ieee80211_hostap.c +++ b/freebsd/sys/net80211/ieee80211_hostap.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include #endif #include +#include #define IEEE80211_RATE2MBS(r) (((r) & IEEE80211_RATE_VAL) / 2) @@ -1747,6 +1748,7 @@ hostap_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, struct ieee80211_frame *wh; uint8_t *frm, *efrm, *sfrm; uint8_t *ssid, *rates, *xrates, *wpa, *rsn, *wme, *ath, *htcap; + uint8_t *vhtcap, *vhtinfo; int reassoc, resp; uint8_t rate; @@ -2044,6 +2046,7 @@ hostap_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, if (reassoc) frm += 6; /* ignore current AP info */ ssid = rates = xrates = wpa = rsn = wme = ath = htcap = NULL; + vhtcap = vhtinfo = NULL; sfrm = frm; while (efrm - frm > 1) { IEEE80211_VERIFY_LENGTH(efrm - frm, frm[1] + 2, return); @@ -2063,6 +2066,12 @@ hostap_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, case IEEE80211_ELEMID_HTCAP: htcap = frm; break; + case IEEE80211_ELEMID_VHT_CAP: + vhtcap = frm; + break; + case IEEE80211_ELEMID_VHT_OPMODE: + vhtinfo = frm; + break; case IEEE80211_ELEMID_VENDOR: if (iswpaoui(frm)) wpa = frm; @@ -2094,6 +2103,18 @@ hostap_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, return); /* XXX just NULL out? */ } + /* Validate VHT IEs */ + if (vhtcap != NULL) { + IEEE80211_VERIFY_LENGTH(vhtcap[1], + sizeof(struct ieee80211_ie_vhtcap) - 2, + return); + } + if (vhtinfo != NULL) { + IEEE80211_VERIFY_LENGTH(vhtinfo[1], + sizeof(struct ieee80211_ie_vht_operation) - 2, + return); + } + if ((vap->iv_flags & IEEE80211_F_WPA) && !wpa_assocreq(ni, &rsnparms, wh, wpa, rsn, capinfo)) return; @@ -2137,10 +2158,24 @@ hostap_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, vap->iv_stats.is_rx_assoc_norate++; return; } + /* * Do HT rate set handling and setup HT node state. */ ni->ni_chan = vap->iv_bss->ni_chan; + + /* VHT */ + if (IEEE80211_IS_CHAN_VHT(ni->ni_chan) && + vhtcap != NULL && + vhtinfo != NULL) { + /* XXX TODO; see below */ + printf("%s: VHT TODO!\n", __func__); + ieee80211_vht_node_init(ni); + ieee80211_vht_update_cap(ni, vhtcap, vhtinfo); + } else if (ni->ni_flags & IEEE80211_NODE_VHT) + ieee80211_vht_node_cleanup(ni); + + /* HT */ if (IEEE80211_IS_CHAN_HT(ni->ni_chan) && htcap != NULL) { rate = ieee80211_setup_htrates(ni, htcap, IEEE80211_F_DOFMCS | IEEE80211_F_DONEGO | @@ -2155,6 +2190,12 @@ hostap_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, ieee80211_ht_updatehtcap(ni, htcap); } else if (ni->ni_flags & IEEE80211_NODE_HT) ieee80211_ht_node_cleanup(ni); + + /* Finally - this will use HT/VHT info to change node channel */ + if (IEEE80211_IS_CHAN_HT(ni->ni_chan) && htcap != NULL) { + ieee80211_ht_updatehtcap_final(ni); + } + #ifdef IEEE80211_SUPPORT_SUPERG /* Always do ff node cleanup; for A-MSDU */ ieee80211_ff_node_cleanup(ni); diff --git a/freebsd/sys/net80211/ieee80211_ht.c b/freebsd/sys/net80211/ieee80211_ht.c index beeddda4..28093ed1 100644 --- a/freebsd/sys/net80211/ieee80211_ht.c +++ b/freebsd/sys/net80211/ieee80211_ht.c @@ -300,6 +300,11 @@ ieee80211_ht_vattach(struct ieee80211vap *vap) vap->iv_flags_ht |= IEEE80211_FHT_STBC_TX; if (vap->iv_htcaps & IEEE80211_HTCAP_RXSTBC) vap->iv_flags_ht |= IEEE80211_FHT_STBC_RX; + + if (vap->iv_htcaps & IEEE80211_HTCAP_LDPC) + vap->iv_flags_ht |= IEEE80211_FHT_LDPC_RX; + if (vap->iv_htcaps & IEEE80211_HTC_TXLDPC) + vap->iv_flags_ht |= IEEE80211_FHT_LDPC_TX; } /* NB: disable default legacy WDS, too many issues right now */ if (vap->iv_flags_ext & IEEE80211_FEXT_WDSLEGACY) @@ -418,19 +423,17 @@ ieee80211_ht_announce(struct ieee80211com *ic) ht_announce(ic, IEEE80211_MODE_11NG); } -static struct ieee80211_htrateset htrateset; - -const struct ieee80211_htrateset * -ieee80211_get_suphtrates(struct ieee80211com *ic, - const struct ieee80211_channel *c) +void +ieee80211_init_suphtrates(struct ieee80211com *ic) { #define ADDRATE(x) do { \ - htrateset.rs_rates[htrateset.rs_nrates] = x; \ - htrateset.rs_nrates++; \ + htrateset->rs_rates[htrateset->rs_nrates] = x; \ + htrateset->rs_nrates++; \ } while (0) + struct ieee80211_htrateset *htrateset = &ic->ic_sup_htrates; int i; - memset(&htrateset, 0, sizeof(struct ieee80211_htrateset)); + memset(htrateset, 0, sizeof(struct ieee80211_htrateset)); for (i = 0; i < ic->ic_txstream * 8; i++) ADDRATE(i); if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) && @@ -450,7 +453,6 @@ ieee80211_get_suphtrates(struct ieee80211com *ic, ADDRATE(i); } } - return &htrateset; #undef ADDRATE } @@ -644,6 +646,40 @@ ampdu_dispatch(struct ieee80211_node *ni, struct mbuf *m) (void) ieee80211_input(ni, m, 0, 0); } +static void +ampdu_rx_moveup(struct ieee80211_rx_ampdu *rap, struct ieee80211_node *ni, + int i, int winstart) +{ + struct ieee80211vap *vap = ni->ni_vap; + + if (rap->rxa_qframes != 0) { + int n = rap->rxa_qframes, j; + + if (winstart != -1) { + /* + * NB: in window-sliding mode, loop assumes i > 0 + * and/or rxa_m[0] is NULL + */ + KASSERT(rap->rxa_m[0] == NULL, + ("%s: BA window slot 0 occupied", __func__)); + } + for (j = i+1; j < rap->rxa_wnd; j++) { + if (rap->rxa_m[j] != NULL) { + rap->rxa_m[j-i] = rap->rxa_m[j]; + rap->rxa_m[j] = NULL; + if (--n == 0) + break; + } + } + KASSERT(n == 0, ("%s: lost %d frames, qframes %d off %d " + "BA win <%d:%d> winstart %d", + __func__, n, rap->rxa_qframes, i, rap->rxa_start, + IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), + winstart)); + vap->iv_stats.is_ampdu_rx_copy += rap->rxa_qframes; + } +} + /* * Dispatch as many frames as possible from the re-order queue. * Frames will always be "at the front"; we process all frames @@ -674,19 +710,8 @@ ampdu_rx_dispatch(struct ieee80211_rx_ampdu *rap, struct ieee80211_node *ni) * If frames remain, copy the mbuf pointers down so * they correspond to the offsets in the new window. */ - if (rap->rxa_qframes != 0) { - int n = rap->rxa_qframes, j; - for (j = i+1; j < rap->rxa_wnd; j++) { - if (rap->rxa_m[j] != NULL) { - rap->rxa_m[j-i] = rap->rxa_m[j]; - rap->rxa_m[j] = NULL; - if (--n == 0) - break; - } - } - KASSERT(n == 0, ("lost %d frames", n)); - vap->iv_stats.is_ampdu_rx_copy += rap->rxa_qframes; - } + ampdu_rx_moveup(rap, ni, i, -1); + /* * Adjust the start of the BA window to * reflect the frames just dispatched. @@ -761,27 +786,8 @@ ampdu_rx_flush_upto(struct ieee80211_node *ni, * If frames remain, copy the mbuf pointers down so * they correspond to the offsets in the new window. */ - if (rap->rxa_qframes != 0) { - int n = rap->rxa_qframes, j; + ampdu_rx_moveup(rap, ni, i, winstart); - /* NB: this loop assumes i > 0 and/or rxa_m[0] is NULL */ - KASSERT(rap->rxa_m[0] == NULL, - ("%s: BA window slot 0 occupied", __func__)); - for (j = i+1; j < rap->rxa_wnd; j++) { - if (rap->rxa_m[j] != NULL) { - rap->rxa_m[j-i] = rap->rxa_m[j]; - rap->rxa_m[j] = NULL; - if (--n == 0) - break; - } - } - KASSERT(n == 0, ("%s: lost %d frames, qframes %d off %d " - "BA win <%d:%d> winstart %d", - __func__, n, rap->rxa_qframes, i, rap->rxa_start, - IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), - winstart)); - vap->iv_stats.is_ampdu_rx_copy += rap->rxa_qframes; - } /* * Move the start of the BA window; we use the * sequence number of the last MSDU that was @@ -824,6 +830,16 @@ ieee80211_ampdu_reorder(struct ieee80211_node *ni, struct mbuf *m) */ return PROCESS; } + + /* + * 802.11-2012 9.3.2.10 - Duplicate detection and recovery. + * + * Multicast QoS data frames are checked against a different + * counter, not the per-TID counter. + */ + if (IEEE80211_IS_MULTICAST(wh->i_addr1)) + return PROCESS; + if (IEEE80211_IS_DSTODS(wh)) tid = ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0]; else @@ -1492,52 +1508,117 @@ ieee80211_parse_htinfo(struct ieee80211_node *ni, const uint8_t *ie) } /* - * Handle 11n channel switch. Use the received HT ie's to - * identify the right channel to use. If we cannot locate it - * in the channel table then fallback to legacy operation. + * Handle 11n/11ac channel switch. + * + * Use the received HT/VHT ie's to identify the right channel to use. + * If we cannot locate it in the channel table then fallback to + * legacy operation. + * * Note that we use this information to identify the node's * channel only; the caller is responsible for insuring any * required channel change is done (e.g. in sta mode when * parsing the contents of a beacon frame). */ static int -htinfo_update_chw(struct ieee80211_node *ni, int htflags) +htinfo_update_chw(struct ieee80211_node *ni, int htflags, int vhtflags) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211_channel *c; int chanflags; int ret = 0; - chanflags = (ni->ni_chan->ic_flags &~ IEEE80211_CHAN_HT) | htflags; - if (chanflags != ni->ni_chan->ic_flags) { - /* XXX not right for ht40- */ - c = ieee80211_find_channel(ic, ni->ni_chan->ic_freq, chanflags); - if (c == NULL && (htflags & IEEE80211_CHAN_HT40)) { - /* - * No HT40 channel entry in our table; fall back - * to HT20 operation. This should not happen. - */ - c = findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT20); + /* + * First step - do HT/VHT only channel lookup based on operating mode + * flags. This involves masking out the VHT flags as well. + * Otherwise we end up doing the full channel walk each time + * we trigger this, which is expensive. + */ + chanflags = (ni->ni_chan->ic_flags &~ + (IEEE80211_CHAN_HT | IEEE80211_CHAN_VHT)) | htflags | vhtflags; + + if (chanflags == ni->ni_chan->ic_flags) + goto done; + + /* + * If HT /or/ VHT flags have changed then check both. + * We need to start by picking a HT channel anyway. + */ + + c = NULL; + chanflags = (ni->ni_chan->ic_flags &~ + (IEEE80211_CHAN_HT | IEEE80211_CHAN_VHT)) | htflags; + /* XXX not right for ht40- */ + c = ieee80211_find_channel(ic, ni->ni_chan->ic_freq, chanflags); + if (c == NULL && (htflags & IEEE80211_CHAN_HT40)) { + /* + * No HT40 channel entry in our table; fall back + * to HT20 operation. This should not happen. + */ + c = findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT20); #if 0 - IEEE80211_NOTE(ni->ni_vap, - IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni, - "no HT40 channel (freq %u), falling back to HT20", - ni->ni_chan->ic_freq); + IEEE80211_NOTE(ni->ni_vap, + IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni, + "no HT40 channel (freq %u), falling back to HT20", + ni->ni_chan->ic_freq); #endif - /* XXX stat */ - } - if (c != NULL && c != ni->ni_chan) { - IEEE80211_NOTE(ni->ni_vap, - IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni, - "switch station to HT%d channel %u/0x%x", - IEEE80211_IS_CHAN_HT40(c) ? 40 : 20, - c->ic_freq, c->ic_flags); - ni->ni_chan = c; - ret = 1; - } - /* NB: caller responsible for forcing any channel change */ + /* XXX stat */ + } + + /* Nothing found - leave it alone; move onto VHT */ + if (c == NULL) + c = ni->ni_chan; + + /* + * If it's non-HT, then bail out now. + */ + if (! IEEE80211_IS_CHAN_HT(c)) { + IEEE80211_NOTE(ni->ni_vap, + IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni, + "not HT; skipping VHT check (%u/0x%x)", + c->ic_freq, c->ic_flags); + goto done; + } + + /* + * Next step - look at the current VHT flags and determine + * if we need to upgrade. Mask out the VHT and HT flags since + * the vhtflags field will already have the correct HT + * flags to use. + */ + if (IEEE80211_CONF_VHT(ic) && ni->ni_vhtcap != 0 && vhtflags != 0) { + chanflags = (c->ic_flags + &~ (IEEE80211_CHAN_HT | IEEE80211_CHAN_VHT)) + | vhtflags; + IEEE80211_NOTE(ni->ni_vap, + IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, + ni, + "%s: VHT; chanwidth=0x%02x; vhtflags=0x%08x", + __func__, ni->ni_vht_chanwidth, vhtflags); + + IEEE80211_NOTE(ni->ni_vap, + IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, + ni, + "%s: VHT; trying lookup for %d/0x%08x", + __func__, c->ic_freq, chanflags); + c = ieee80211_find_channel(ic, c->ic_freq, chanflags); + } + + /* Finally, if it's changed */ + if (c != NULL && c != ni->ni_chan) { + IEEE80211_NOTE(ni->ni_vap, + IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni, + "switch station to %s%d channel %u/0x%x", + IEEE80211_IS_CHAN_VHT(c) ? "VHT" : "HT", + IEEE80211_IS_CHAN_VHT80(c) ? 80 : + (IEEE80211_IS_CHAN_HT40(c) ? 40 : 20), + c->ic_freq, c->ic_flags); + ni->ni_chan = c; + ret = 1; } - /* update node's tx channel width */ + /* NB: caller responsible for forcing any channel change */ + +done: + /* update node's (11n) tx channel width */ ni->ni_chw = IEEE80211_IS_CHAN_HT40(ni->ni_chan)? 40 : 20; return (ret); } @@ -1586,31 +1667,156 @@ htcap_update_shortgi(struct ieee80211_node *ni) ni->ni_flags |= IEEE80211_NODE_SGI40; } +/* + * Update LDPC state according to received htcap + * and local settings. + */ +static __inline void +htcap_update_ldpc(struct ieee80211_node *ni) +{ + struct ieee80211vap *vap = ni->ni_vap; + + if ((ni->ni_htcap & IEEE80211_HTCAP_LDPC) && + (vap->iv_flags_ht & IEEE80211_FHT_LDPC_TX)) + ni->ni_flags |= IEEE80211_NODE_LDPC; +} + /* * Parse and update HT-related state extracted from * the HT cap and info ie's. + * + * This is called from the STA management path and + * the ieee80211_node_join() path. It will take into + * account the IEs discovered during scanning and + * adjust things accordingly. */ -int +void ieee80211_ht_updateparams(struct ieee80211_node *ni, const uint8_t *htcapie, const uint8_t *htinfoie) { struct ieee80211vap *vap = ni->ni_vap; const struct ieee80211_ie_htinfo *htinfo; - int htflags; - int ret = 0; ieee80211_parse_htcap(ni, htcapie); if (vap->iv_htcaps & IEEE80211_HTCAP_SMPS) htcap_update_mimo_ps(ni); htcap_update_shortgi(ni); + htcap_update_ldpc(ni); if (htinfoie[0] == IEEE80211_ELEMID_VENDOR) htinfoie += 4; htinfo = (const struct ieee80211_ie_htinfo *) htinfoie; htinfo_parse(ni, htinfo); + /* + * Defer the node channel change; we need to now + * update VHT parameters before we do it. + */ + + if ((htinfo->hi_byte1 & IEEE80211_HTINFO_RIFSMODE_PERM) && + (vap->iv_flags_ht & IEEE80211_FHT_RIFS)) + ni->ni_flags |= IEEE80211_NODE_RIFS; + else + ni->ni_flags &= ~IEEE80211_NODE_RIFS; +} + +static uint32_t +ieee80211_vht_get_vhtflags(struct ieee80211_node *ni, uint32_t htflags) +{ + struct ieee80211vap *vap = ni->ni_vap; + uint32_t vhtflags = 0; + + vhtflags = 0; + if (ni->ni_flags & IEEE80211_NODE_VHT && vap->iv_flags_vht & IEEE80211_FVHT_VHT) { + if ((ni->ni_vht_chanwidth == IEEE80211_VHT_CHANWIDTH_160MHZ) && + /* XXX 2 means "160MHz and 80+80MHz", 1 means "160MHz" */ + (MS(vap->iv_vhtcaps, + IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK) >= 1) && + (vap->iv_flags_vht & IEEE80211_FVHT_USEVHT160)) { + vhtflags = IEEE80211_CHAN_VHT160; + /* Mirror the HT40 flags */ + if (htflags == IEEE80211_CHAN_HT40U) { + vhtflags |= IEEE80211_CHAN_HT40U; + } else if (htflags == IEEE80211_CHAN_HT40D) { + vhtflags |= IEEE80211_CHAN_HT40D; + } + } else if ((ni->ni_vht_chanwidth == IEEE80211_VHT_CHANWIDTH_80P80MHZ) && + /* XXX 2 means "160MHz and 80+80MHz" */ + (MS(vap->iv_vhtcaps, + IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK) == 2) && + (vap->iv_flags_vht & IEEE80211_FVHT_USEVHT80P80)) { + vhtflags = IEEE80211_CHAN_VHT80_80; + /* Mirror the HT40 flags */ + if (htflags == IEEE80211_CHAN_HT40U) { + vhtflags |= IEEE80211_CHAN_HT40U; + } else if (htflags == IEEE80211_CHAN_HT40D) { + vhtflags |= IEEE80211_CHAN_HT40D; + } + } else if ((ni->ni_vht_chanwidth == IEEE80211_VHT_CHANWIDTH_80MHZ) && + (vap->iv_flags_vht & IEEE80211_FVHT_USEVHT80)) { + vhtflags = IEEE80211_CHAN_VHT80; + /* Mirror the HT40 flags */ + if (htflags == IEEE80211_CHAN_HT40U) { + vhtflags |= IEEE80211_CHAN_HT40U; + } else if (htflags == IEEE80211_CHAN_HT40D) { + vhtflags |= IEEE80211_CHAN_HT40D; + } + } else if (ni->ni_vht_chanwidth == IEEE80211_VHT_CHANWIDTH_USE_HT) { + /* Mirror the HT40 flags */ + /* + * XXX TODO: if ht40 is disabled, but vht40 isn't + * disabled then this logic will get very, very sad. + * It's quite possible the only sane thing to do is + * to not have vht40 as an option, and just obey + * 'ht40' as that flag. + */ + if ((htflags == IEEE80211_CHAN_HT40U) && + (vap->iv_flags_vht & IEEE80211_FVHT_USEVHT40)) { + vhtflags = IEEE80211_CHAN_VHT40U + | IEEE80211_CHAN_HT40U; + } else if (htflags == IEEE80211_CHAN_HT40D && + (vap->iv_flags_vht & IEEE80211_FVHT_USEVHT40)) { + vhtflags = IEEE80211_CHAN_VHT40D + | IEEE80211_CHAN_HT40D; + } else if (htflags == IEEE80211_CHAN_HT20) { + vhtflags = IEEE80211_CHAN_VHT20 + | IEEE80211_CHAN_HT20; + } + } else { + vhtflags = IEEE80211_CHAN_VHT20; + } + } + return (vhtflags); +} + +/* + * Final part of updating the HT parameters. + * + * This is called from the STA management path and + * the ieee80211_node_join() path. It will take into + * account the IEs discovered during scanning and + * adjust things accordingly. + * + * This is done after a call to ieee80211_ht_updateparams() + * because it (and the upcoming VHT version of updateparams) + * needs to ensure everything is parsed before htinfo_update_chw() + * is called - which will change the channel config for the + * node for us. + */ +int +ieee80211_ht_updateparams_final(struct ieee80211_node *ni, + const uint8_t *htcapie, const uint8_t *htinfoie) +{ + struct ieee80211vap *vap = ni->ni_vap; + const struct ieee80211_ie_htinfo *htinfo; + int htflags, vhtflags; + int ret = 0; + + htinfo = (const struct ieee80211_ie_htinfo *) htinfoie; + htflags = (vap->iv_flags_ht & IEEE80211_FHT_HT) ? IEEE80211_CHAN_HT20 : 0; + /* NB: honor operating mode constraint */ if ((htinfo->hi_byte1 & IEEE80211_HTINFO_TXWIDTH_2040) && (vap->iv_flags_ht & IEEE80211_FHT_USEHT40)) { @@ -1619,14 +1825,16 @@ ieee80211_ht_updateparams(struct ieee80211_node *ni, else if (ni->ni_ht2ndchan == IEEE80211_HTINFO_2NDCHAN_BELOW) htflags = IEEE80211_CHAN_HT40D; } - if (htinfo_update_chw(ni, htflags)) - ret = 1; - if ((htinfo->hi_byte1 & IEEE80211_HTINFO_RIFSMODE_PERM) && - (vap->iv_flags_ht & IEEE80211_FHT_RIFS)) - ni->ni_flags |= IEEE80211_NODE_RIFS; - else - ni->ni_flags &= ~IEEE80211_NODE_RIFS; + /* + * VHT flags - do much the same; check whether VHT is available + * and if so, what our ideal channel use would be based on our + * capabilities and the (pre-parsed) VHT info IE. + */ + vhtflags = ieee80211_vht_get_vhtflags(ni, htflags); + + if (htinfo_update_chw(ni, htflags, vhtflags)) + ret = 1; return (ret); } @@ -1634,17 +1842,32 @@ ieee80211_ht_updateparams(struct ieee80211_node *ni, /* * Parse and update HT-related state extracted from the HT cap ie * for a station joining an HT BSS. + * + * This is called from the hostap path for each station. */ void ieee80211_ht_updatehtcap(struct ieee80211_node *ni, const uint8_t *htcapie) { struct ieee80211vap *vap = ni->ni_vap; - int htflags; ieee80211_parse_htcap(ni, htcapie); if (vap->iv_htcaps & IEEE80211_HTCAP_SMPS) htcap_update_mimo_ps(ni); htcap_update_shortgi(ni); + htcap_update_ldpc(ni); +} + +/* + * Called once HT and VHT capabilities are parsed in hostap mode - + * this will adjust the channel configuration of the given node + * based on the configuration and capabilities. + */ +void +ieee80211_ht_updatehtcap_final(struct ieee80211_node *ni) +{ + struct ieee80211vap *vap = ni->ni_vap; + int htflags; + int vhtflags; /* NB: honor operating mode constraint */ /* XXX 40 MHz intolerant */ @@ -1657,7 +1880,14 @@ ieee80211_ht_updatehtcap(struct ieee80211_node *ni, const uint8_t *htcapie) else if (IEEE80211_IS_CHAN_HT40D(vap->iv_bss->ni_chan)) htflags = IEEE80211_CHAN_HT40D; } - (void) htinfo_update_chw(ni, htflags); + /* + * VHT flags - do much the same; check whether VHT is available + * and if so, what our ideal channel use would be based on our + * capabilities and the (pre-parsed) VHT info IE. + */ + vhtflags = ieee80211_vht_get_vhtflags(ni, htflags); + + (void) htinfo_update_chw(ni, htflags, vhtflags); } /* @@ -2137,6 +2367,7 @@ ht_recv_action_ht_txchwidth(struct ieee80211_node *ni, "%s: HT txchwidth, width %d%s", __func__, chw, ni->ni_chw != chw ? "*" : ""); if (chw != ni->ni_chw) { + /* XXX does this need to change the ht40 station count? */ ni->ni_chw = chw; /* XXX notify on change */ } @@ -2231,6 +2462,10 @@ ieee80211_ampdu_request(struct ieee80211_node *ni, dialogtoken = (tokens+1) % 63; /* XXX */ tid = tap->txa_tid; + + /* + * XXX TODO: This is racy with any other parallel TX going on. :( + */ tap->txa_start = ni->ni_txseqs[tid]; args[0] = dialogtoken; @@ -2826,7 +3061,9 @@ ieee80211_add_htcap_body(uint8_t *frm, struct ieee80211_node *ni) if ((vap->iv_flags_ht & IEEE80211_FHT_STBC_RX) == 0) caps &= ~IEEE80211_HTCAP_RXSTBC; - /* XXX TODO: adjust LDPC based on receive capabilities */ + /* adjust LDPC based on receive capabilites */ + if ((vap->iv_flags_ht & IEEE80211_FHT_LDPC_RX) == 0) + caps &= ~IEEE80211_HTCAP_LDPC; ADDSHORT(frm, caps); diff --git a/freebsd/sys/net80211/ieee80211_ht.h b/freebsd/sys/net80211/ieee80211_ht.h index dfc7d1c3..5b818a28 100644 --- a/freebsd/sys/net80211/ieee80211_ht.h +++ b/freebsd/sys/net80211/ieee80211_ht.h @@ -177,8 +177,7 @@ struct ieee80211_mcs_rates { uint16_t ht40_rate_400ns; }; extern const struct ieee80211_mcs_rates ieee80211_htrates[]; -const struct ieee80211_htrateset *ieee80211_get_suphtrates( - struct ieee80211com *, const struct ieee80211_channel *); +void ieee80211_init_suphtrates(struct ieee80211com *); struct ieee80211_node; int ieee80211_setup_htrates(struct ieee80211_node *, @@ -201,9 +200,12 @@ void ieee80211_htprot_update(struct ieee80211com *, int protmode); void ieee80211_ht_timeout(struct ieee80211com *); void ieee80211_parse_htcap(struct ieee80211_node *, const uint8_t *); void ieee80211_parse_htinfo(struct ieee80211_node *, const uint8_t *); -int ieee80211_ht_updateparams(struct ieee80211_node *, const uint8_t *, +void ieee80211_ht_updateparams(struct ieee80211_node *, const uint8_t *, const uint8_t *); +int ieee80211_ht_updateparams_final(struct ieee80211_node *, + const uint8_t *, const uint8_t *); void ieee80211_ht_updatehtcap(struct ieee80211_node *, const uint8_t *); +void ieee80211_ht_updatehtcap_final(struct ieee80211_node *); int ieee80211_ampdu_request(struct ieee80211_node *, struct ieee80211_tx_ampdu *); void ieee80211_ampdu_stop(struct ieee80211_node *, diff --git a/freebsd/sys/net80211/ieee80211_input.c b/freebsd/sys/net80211/ieee80211_input.c index 0e427f84..22d9a565 100644 --- a/freebsd/sys/net80211/ieee80211_input.c +++ b/freebsd/sys/net80211/ieee80211_input.c @@ -496,6 +496,8 @@ ieee80211_parse_beacon(struct ieee80211_node *ni, struct mbuf *m, scan->status = 0; /* * beacon/probe response frame format + * + * XXX Update from 802.11-2012 - eg where HT is * [8] time stamp * [2] beacon interval * [2] capability information @@ -510,6 +512,8 @@ ieee80211_parse_beacon(struct ieee80211_node *ni, struct mbuf *m, * [tlv] WPA or RSN * [tlv] HT capabilities * [tlv] HT information + * [tlv] VHT capabilities + * [tlv] VHT information * [tlv] Atheros capabilities * [tlv] Mesh ID * [tlv] Mesh Configuration @@ -587,6 +591,12 @@ ieee80211_parse_beacon(struct ieee80211_node *ni, struct mbuf *m, case IEEE80211_ELEMID_HTCAP: scan->htcap = frm; break; + case IEEE80211_ELEMID_VHT_CAP: + scan->vhtcap = frm; + break; + case IEEE80211_ELEMID_VHT_OPMODE: + scan->vhtopmode = frm; + break; case IEEE80211_ELEMID_RSN: scan->rsn = frm; break; @@ -720,6 +730,19 @@ ieee80211_parse_beacon(struct ieee80211_node *ni, struct mbuf *m, sizeof(struct ieee80211_ie_htinfo)-2, scan->htinfo = NULL); } + + /* Process VHT IEs */ + if (scan->vhtcap != NULL) { + IEEE80211_VERIFY_LENGTH(scan->vhtcap[1], + sizeof(struct ieee80211_ie_vhtcap) - 2, + scan->vhtcap = NULL); + } + if (scan->vhtopmode != NULL) { + IEEE80211_VERIFY_LENGTH(scan->vhtopmode[1], + sizeof(struct ieee80211_ie_vht_operation) - 2, + scan->vhtopmode = NULL); + } + return scan->status; } @@ -840,6 +863,9 @@ ieee80211_parse_action(struct ieee80211_node *ni, struct mbuf *m) } break; #endif + case IEEE80211_ACTION_CAT_VHT: + printf("%s: TODO: VHT handling!\n", __func__); + break; } return 0; } diff --git a/freebsd/sys/net80211/ieee80211_input.h b/freebsd/sys/net80211/ieee80211_input.h index 6fb0d707..0ae8dd08 100644 --- a/freebsd/sys/net80211/ieee80211_input.h +++ b/freebsd/sys/net80211/ieee80211_input.h @@ -149,6 +149,12 @@ ishtinfooui(const uint8_t *frm) * (as the seqnum wraps), handle that special case so packets aren't * incorrectly dropped - ie, if the next packet is sequence number 0 * but a retransmit since the initial packet didn't make it. + * + * XXX TODO: handle sequence number space wrapping with dropped frames; + * especially in high interference conditions under high traffic load + * The RX AMPDU reorder code also needs it. + * + * XXX TODO: update for 802.11-2012 9.3.2.10 Duplicate Detection and Recovery. */ static __inline int ieee80211_check_rxseq(struct ieee80211_node *ni, struct ieee80211_frame *wh, @@ -175,6 +181,13 @@ ieee80211_check_rxseq(struct ieee80211_node *ni, struct ieee80211_frame *wh, if (! IEEE80211_HAS_SEQ(type, subtype)) return 1; + /* + * Always allow multicast frames for now - QoS (any TID) + * or not. + */ + if (IEEE80211_IS_MULTICAST(wh->i_addr1)) + return 1; + tid = ieee80211_gettid(wh); /* diff --git a/freebsd/sys/net80211/ieee80211_ioctl.c b/freebsd/sys/net80211/ieee80211_ioctl.c index c0813a78..4b874574 100644 --- a/freebsd/sys/net80211/ieee80211_ioctl.c +++ b/freebsd/sys/net80211/ieee80211_ioctl.c @@ -1138,6 +1138,13 @@ ieee80211_ioctl_get80211(struct ieee80211vap *vap, u_long cmd, if (vap->iv_flags_ht & IEEE80211_FHT_STBC_RX) ireq->i_val |= 2; break; + case IEEE80211_IOC_LDPC: + ireq->i_val = 0; + if (vap->iv_flags_ht & IEEE80211_FHT_LDPC_TX) + ireq->i_val |= 1; + if (vap->iv_flags_ht & IEEE80211_FHT_LDPC_RX) + ireq->i_val |= 2; + break; /* VHT */ case IEEE80211_IOC_VHTCONF: @@ -2221,13 +2228,19 @@ checkrate(const struct ieee80211_rateset *rs, int rate) } static int -checkmcs(int mcs) +checkmcs(const struct ieee80211_htrateset *rs, int mcs) { + int rate_val = IEEE80211_RV(mcs); + int i; + if (mcs == IEEE80211_FIXED_RATE_NONE) return 1; if ((mcs & IEEE80211_RATE_MCS) == 0) /* MCS always have 0x80 set */ return 0; - return (mcs & 0x7f) <= 15; /* XXX could search ht rate set */ + for (i = 0; i < rs->rs_nrates; i++) + if (IEEE80211_RV(rs->rs_rates[i]) == rate_val) + return 1; + return 0; } static int @@ -2237,6 +2250,7 @@ ieee80211_ioctl_settxparams(struct ieee80211vap *vap, struct ieee80211com *ic = vap->iv_ic; struct ieee80211_txparams_req parms; /* XXX stack use? */ struct ieee80211_txparam *src, *dst; + const struct ieee80211_htrateset *rs_ht; const struct ieee80211_rateset *rs; int error, mode, changed, is11n, nmodes; @@ -2255,23 +2269,24 @@ ieee80211_ioctl_settxparams(struct ieee80211vap *vap, src = &parms.params[mode]; dst = &vap->iv_txparms[mode]; rs = &ic->ic_sup_rates[mode]; /* NB: 11n maps to legacy */ + rs_ht = &ic->ic_sup_htrates; is11n = (mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG); if (src->ucastrate != dst->ucastrate) { if (!checkrate(rs, src->ucastrate) && - (!is11n || !checkmcs(src->ucastrate))) + (!is11n || !checkmcs(rs_ht, src->ucastrate))) return EINVAL; changed++; } if (src->mcastrate != dst->mcastrate) { if (!checkrate(rs, src->mcastrate) && - (!is11n || !checkmcs(src->mcastrate))) + (!is11n || !checkmcs(rs_ht, src->mcastrate))) return EINVAL; changed++; } if (src->mgmtrate != dst->mgmtrate) { if (!checkrate(rs, src->mgmtrate) && - (!is11n || !checkmcs(src->mgmtrate))) + (!is11n || !checkmcs(rs_ht, src->mgmtrate))) return EINVAL; changed++; } @@ -3374,6 +3389,31 @@ ieee80211_ioctl_set80211(struct ieee80211vap *vap, u_long cmd, struct ieee80211r if (isvapht(vap)) error = ERESTART; break; + case IEEE80211_IOC_LDPC: + /* Check if we can do LDPC TX/RX before changing the setting */ + if ((ireq->i_val & 1) && + (vap->iv_htcaps & IEEE80211_HTC_TXLDPC) == 0) + return EOPNOTSUPP; + if ((ireq->i_val & 2) && + (vap->iv_htcaps & IEEE80211_HTCAP_LDPC) == 0) + return EOPNOTSUPP; + + /* TX */ + if (ireq->i_val & 1) + vap->iv_flags_ht |= IEEE80211_FHT_LDPC_TX; + else + vap->iv_flags_ht &= ~IEEE80211_FHT_LDPC_TX; + + /* RX */ + if (ireq->i_val & 2) + vap->iv_flags_ht |= IEEE80211_FHT_LDPC_RX; + else + vap->iv_flags_ht &= ~IEEE80211_FHT_LDPC_RX; + + /* NB: reset only if we're operating on an 11n channel */ + if (isvapht(vap)) + error = ERESTART; + break; /* VHT */ case IEEE80211_IOC_VHTCONF: diff --git a/freebsd/sys/net80211/ieee80211_node.c b/freebsd/sys/net80211/ieee80211_node.c index c9c6df96..b15a782f 100644 --- a/freebsd/sys/net80211/ieee80211_node.c +++ b/freebsd/sys/net80211/ieee80211_node.c @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include @@ -350,7 +351,6 @@ ieee80211_create_ibss(struct ieee80211vap* vap, struct ieee80211_channel *chan) ni->ni_fhindex = 1; } if (vap->iv_opmode == IEEE80211_M_IBSS) { - vap->iv_flags |= IEEE80211_F_SIBSS; ni->ni_capinfo |= IEEE80211_CAPINFO_IBSS; /* XXX */ if (vap->iv_flags & IEEE80211_F_DESBSSID) IEEE80211_ADDR_COPY(ni->ni_bssid, vap->iv_des_bssid); @@ -414,7 +414,11 @@ ieee80211_create_ibss(struct ieee80211vap* vap, struct ieee80211_channel *chan) /* XXX TODO: other bits and pieces - eg fast-frames? */ /* If we're an 11n channel then initialise the 11n bits */ - if (IEEE80211_IS_CHAN_HT(ni->ni_chan)) { + if (IEEE80211_IS_CHAN_VHT(ni->ni_chan)) { + /* XXX what else? */ + ieee80211_ht_node_init(ni); + ieee80211_vht_node_init(ni); + } else if (IEEE80211_IS_CHAN_HT(ni->ni_chan)) { /* XXX what else? */ ieee80211_ht_node_init(ni); } @@ -709,10 +713,43 @@ gethtadjustflags(struct ieee80211com *ic) return flags; } +/* + * Calculate VHT channel promotion flags for all vaps. + * This assumes ni_chan have been setup for each vap. + */ +static int +getvhtadjustflags(struct ieee80211com *ic) +{ + struct ieee80211vap *vap; + int flags; + + flags = 0; + /* XXX locking */ + TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { + if (vap->iv_state < IEEE80211_S_RUN) + continue; + switch (vap->iv_opmode) { + case IEEE80211_M_WDS: + case IEEE80211_M_STA: + case IEEE80211_M_AHDEMO: + case IEEE80211_M_HOSTAP: + case IEEE80211_M_IBSS: + case IEEE80211_M_MBSS: + flags |= ieee80211_vhtchanflags(vap->iv_bss->ni_chan); + break; + default: + break; + } + } + return flags; +} + /* * Check if the current channel needs to change based on whether * any vap's are using HT20/HT40. This is used to sync the state * of ic_curchan after a channel width change on a running vap. + * + * Same applies for VHT. */ void ieee80211_sync_curchan(struct ieee80211com *ic) @@ -720,6 +757,8 @@ ieee80211_sync_curchan(struct ieee80211com *ic) struct ieee80211_channel *c; c = ieee80211_ht_adjust_channel(ic, ic->ic_curchan, gethtadjustflags(ic)); + c = ieee80211_vht_adjust_channel(ic, c, getvhtadjustflags(ic)); + if (c != ic->ic_curchan) { ic->ic_curchan = c; ic->ic_curmode = ieee80211_chan2mode(ic->ic_curchan); @@ -745,10 +784,23 @@ ieee80211_setupcurchan(struct ieee80211com *ic, struct ieee80211_channel *c) * set of running vap's. This assumes we are called * after ni_chan is setup for each vap. */ + /* XXX VHT? */ /* NB: this assumes IEEE80211_FHT_USEHT40 > IEEE80211_FHT_HT */ if (flags > ieee80211_htchanflags(c)) c = ieee80211_ht_adjust_channel(ic, c, flags); } + + /* + * VHT promotion - this will at least promote to VHT20/40 + * based on what HT has done; it may further promote the + * channel to VHT80 or above. + */ + if (ic->ic_vhtcaps != 0) { + int flags = getvhtadjustflags(ic); + if (flags > ieee80211_vhtchanflags(c)) + c = ieee80211_vht_adjust_channel(ic, c, flags); + } + ic->ic_bsschan = ic->ic_curchan = c; ic->ic_curmode = ieee80211_chan2mode(ic->ic_curchan); ic->ic_rt = ieee80211_get_ratetable(ic->ic_curchan); @@ -851,6 +903,7 @@ ieee80211_sta_join(struct ieee80211vap *vap, struct ieee80211_channel *chan, { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_node *ni; + int do_ht = 0; ni = ieee80211_alloc_node(&ic->ic_sta, vap, se->se_macaddr); if (ni == NULL) { @@ -898,9 +951,13 @@ ieee80211_sta_join(struct ieee80211vap *vap, struct ieee80211_channel *chan, if (ni->ni_ies.tdma_ie != NULL) ieee80211_parse_tdma(ni, ni->ni_ies.tdma_ie); #endif + if (ni->ni_ies.vhtcap_ie != NULL) + ieee80211_parse_vhtcap(ni, ni->ni_ies.vhtcap_ie); + if (ni->ni_ies.vhtopmode_ie != NULL) + ieee80211_parse_vhtopmode(ni, ni->ni_ies.vhtopmode_ie); - /* XXX parse VHT IEs */ /* XXX parse BSSLOAD IE */ + /* XXX parse TXPWRENV IE */ /* XXX parse APCHANREP IE */ } @@ -928,10 +985,43 @@ ieee80211_sta_join(struct ieee80211vap *vap, struct ieee80211_channel *chan, ieee80211_ht_updateparams(ni, ni->ni_ies.htcap_ie, ni->ni_ies.htinfo_ie); + do_ht = 1; + } + + /* + * Setup VHT state for this node if it's available. + * Same as the above. + * + * For now, don't allow 2GHz VHT operation. + */ + if (ni->ni_ies.vhtopmode_ie != NULL && + ni->ni_ies.vhtcap_ie != NULL && + vap->iv_flags_vht & IEEE80211_FVHT_VHT) { + if (IEEE80211_IS_CHAN_2GHZ(ni->ni_chan)) { + printf("%s: BSS %6D: 2GHz channel, VHT info; ignoring\n", + __func__, + ni->ni_macaddr, + ":"); + } else { + ieee80211_vht_node_init(ni); + ieee80211_vht_updateparams(ni, + ni->ni_ies.vhtcap_ie, + ni->ni_ies.vhtopmode_ie); + ieee80211_setup_vht_rates(ni, ni->ni_ies.vhtcap_ie, + ni->ni_ies.vhtopmode_ie); + do_ht = 1; + } + } + + /* Finally do the node channel change */ + if (do_ht) { + ieee80211_ht_updateparams_final(ni, ni->ni_ies.htcap_ie, + ni->ni_ies.htinfo_ie); ieee80211_setup_htrates(ni, ni->ni_ies.htcap_ie, IEEE80211_F_JOIN | IEEE80211_F_DOBRS); ieee80211_setup_basic_htrates(ni, ni->ni_ies.htinfo_ie); } + /* XXX else check for ath FF? */ /* XXX QoS? Difficult given that WME config is specific to a master */ @@ -1104,8 +1194,10 @@ node_cleanup(struct ieee80211_node *ni) "power save mode off, %u sta's in ps mode", vap->iv_ps_sta); } /* - * Cleanup any HT-related state. + * Cleanup any VHT and HT-related state. */ + if (ni->ni_flags & IEEE80211_NODE_VHT) + ieee80211_vht_node_cleanup(ni); if (ni->ni_flags & IEEE80211_NODE_HT) ieee80211_ht_node_cleanup(ni); #ifdef IEEE80211_SUPPORT_SUPERG @@ -1228,15 +1320,16 @@ node_getmimoinfo(const struct ieee80211_node *ni, bzero(info, sizeof(*info)); - for (i = 0; i < ni->ni_mimo_chains; i++) { + for (i = 0; i < MIN(IEEE80211_MAX_CHAINS, ni->ni_mimo_chains); i++) { + /* Note: for now, just pri20 channel info */ avgrssi = ni->ni_mimo_rssi_ctl[i]; if (avgrssi == IEEE80211_RSSI_DUMMY_MARKER) { - info->rssi[i] = 0; + info->ch[i].rssi[0] = 0; } else { rssi = IEEE80211_RSSI_GET(avgrssi); - info->rssi[i] = rssi < 0 ? 0 : rssi > 127 ? 127 : rssi; + info->ch[i].rssi[0] = rssi < 0 ? 0 : rssi > 127 ? 127 : rssi; } - info->noise[i] = ni->ni_mimo_noise_ctl[i]; + info->ch[i].noise[0] = ni->ni_mimo_noise_ctl[i]; } /* XXX ext radios? */ @@ -1425,6 +1518,7 @@ ieee80211_node_create_wds(struct ieee80211vap *vap, if (vap->iv_flags & IEEE80211_F_FF) ni->ni_flags |= IEEE80211_NODE_FF; #endif + /* XXX VHT */ if ((ic->ic_htcaps & IEEE80211_HTC_HT) && (vap->iv_flags_ht & IEEE80211_FHT_HT)) { /* @@ -1433,6 +1527,9 @@ ieee80211_node_create_wds(struct ieee80211vap *vap, * ni_chan will be adjusted to an HT channel. */ ieee80211_ht_wds_init(ni); + if (vap->iv_flags_vht & IEEE80211_FVHT_VHT) { + printf("%s: TODO: vht_wds_init\n", __func__); + } } else { struct ieee80211_channel *c = ni->ni_chan; /* @@ -1640,7 +1737,7 @@ ieee80211_init_neighbor(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const struct ieee80211_scanparams *sp) { - int do_ht_setup = 0; + int do_ht_setup = 0, do_vht_setup = 0; ni->ni_esslen = sp->ssid[1]; memcpy(ni->ni_essid, sp->ssid + 2, sp->ssid[1]); @@ -1672,11 +1769,23 @@ ieee80211_init_neighbor(struct ieee80211_node *ni, if (ni->ni_ies.htinfo_ie != NULL) ieee80211_parse_htinfo(ni, ni->ni_ies.htinfo_ie); + if (ni->ni_ies.vhtcap_ie != NULL) + ieee80211_parse_vhtcap(ni, ni->ni_ies.vhtcap_ie); + if (ni->ni_ies.vhtopmode_ie != NULL) + ieee80211_parse_vhtopmode(ni, ni->ni_ies.vhtopmode_ie); + if ((ni->ni_ies.htcap_ie != NULL) && (ni->ni_ies.htinfo_ie != NULL) && (ni->ni_vap->iv_flags_ht & IEEE80211_FHT_HT)) { do_ht_setup = 1; } + + if ((ni->ni_ies.vhtcap_ie != NULL) && + (ni->ni_ies.vhtopmode_ie != NULL) && + (ni->ni_vap->iv_flags_vht & IEEE80211_FVHT_VHT)) { + do_vht_setup = 1; + } + } /* NB: must be after ni_chan is setup */ @@ -1694,15 +1803,40 @@ ieee80211_init_neighbor(struct ieee80211_node *ni, ieee80211_ht_updateparams(ni, ni->ni_ies.htcap_ie, ni->ni_ies.htinfo_ie); + + if (do_vht_setup) { + if (IEEE80211_IS_CHAN_2GHZ(ni->ni_chan)) { + printf("%s: BSS %6D: 2GHz channel, VHT info; ignoring\n", + __func__, + ni->ni_macaddr, + ":"); + } else { + ieee80211_vht_node_init(ni); + ieee80211_vht_updateparams(ni, + ni->ni_ies.vhtcap_ie, + ni->ni_ies.vhtopmode_ie); + ieee80211_setup_vht_rates(ni, + ni->ni_ies.vhtcap_ie, + ni->ni_ies.vhtopmode_ie); + } + } + + /* + * Finally do the channel upgrade/change based + * on the HT/VHT configuration. + */ + ieee80211_ht_updateparams_final(ni, ni->ni_ies.htcap_ie, + ni->ni_ies.htinfo_ie); ieee80211_setup_htrates(ni, ni->ni_ies.htcap_ie, IEEE80211_F_JOIN | IEEE80211_F_DOBRS); ieee80211_setup_basic_htrates(ni, ni->ni_ies.htinfo_ie); + ieee80211_node_setuptxparms(ni); ieee80211_ratectl_node_init(ni); - /* Reassociate; we're now 11n */ + /* Reassociate; we're now 11n/11ac */ /* * XXX TODO: this is the wrong thing to do - * we're calling it with isnew=1 so the ath(4) @@ -2367,6 +2501,7 @@ ieee80211_node_timeout(void *arg) IEEE80211_LOCK(ic); ieee80211_erp_timeout(ic); ieee80211_ht_timeout(ic); + ieee80211_vht_timeout(ic); IEEE80211_UNLOCK(ic); } callout_reset(&ic->ic_inact, IEEE80211_INACT_WAIT*hz, @@ -2464,8 +2599,12 @@ ieee80211_dump_node(struct ieee80211_node_table *nt, struct ieee80211_node *ni) printf("\thtcap %x htparam %x htctlchan %u ht2ndchan %u\n", ni->ni_htcap, ni->ni_htparam, ni->ni_htctlchan, ni->ni_ht2ndchan); - printf("\thtopmode %x htstbc %x chw %u\n", + printf("\thtopmode %x htstbc %x htchw %u\n", ni->ni_htopmode, ni->ni_htstbc, ni->ni_chw); + printf("\tvhtcap %x freq1 %d freq2 %d vhtbasicmcs %x\n", + ni->ni_vhtcap, (int) ni->ni_vht_chan1, (int) ni->ni_vht_chan2, + (int) ni->ni_vht_basicmcs); + /* XXX VHT state */ } void @@ -2596,6 +2735,8 @@ ieee80211_node_join(struct ieee80211_node *ni, int resp) if (IEEE80211_IS_CHAN_HT(ic->ic_bsschan)) ieee80211_ht_node_join(ni); + if (IEEE80211_IS_CHAN_VHT(ic->ic_bsschan)) + ieee80211_vht_node_join(ni); if (IEEE80211_IS_CHAN_ANYG(ic->ic_bsschan) && IEEE80211_IS_CHAN_FULL(ic->ic_bsschan)) ieee80211_node_join_11g(ni); @@ -2605,6 +2746,9 @@ ieee80211_node_join(struct ieee80211_node *ni, int resp) } else newassoc = 0; + /* + * XXX VHT - should log VHT channel width, etc + */ IEEE80211_NOTE(vap, IEEE80211_MSG_ASSOC | IEEE80211_MSG_DEBUG, ni, "station associated at aid %d: %s preamble, %s slot time%s%s%s%s%s%s%s%s", IEEE80211_NODE_AID(ni), @@ -2612,6 +2756,7 @@ ieee80211_node_join(struct ieee80211_node *ni, int resp) ic->ic_flags & IEEE80211_F_SHSLOT ? "short" : "long", ic->ic_flags & IEEE80211_F_USEPROT ? ", protection" : "", ni->ni_flags & IEEE80211_NODE_QOS ? ", QoS" : "", + /* XXX update for VHT string */ ni->ni_flags & IEEE80211_NODE_HT ? (ni->ni_chw == 40 ? ", HT40" : ", HT20") : "", ni->ni_flags & IEEE80211_NODE_AMPDU ? " (+AMPDU)" : "", @@ -2776,6 +2921,8 @@ ieee80211_node_leave(struct ieee80211_node *ni) vap->iv_sta_assoc--; ic->ic_sta_assoc--; + if (IEEE80211_IS_CHAN_VHT(ic->ic_bsschan)) + ieee80211_vht_node_leave(ni); if (IEEE80211_IS_CHAN_HT(ic->ic_bsschan)) ieee80211_ht_node_leave(ni); if (IEEE80211_IS_CHAN_ANYG(ic->ic_bsschan) && diff --git a/freebsd/sys/net80211/ieee80211_node.h b/freebsd/sys/net80211/ieee80211_node.h index 7ca24c18..26c05567 100644 --- a/freebsd/sys/net80211/ieee80211_node.h +++ b/freebsd/sys/net80211/ieee80211_node.h @@ -143,6 +143,7 @@ struct ieee80211_node { #define IEEE80211_NODE_AMSDU_RX 0x040000 /* AMSDU rx enabled */ #define IEEE80211_NODE_AMSDU_TX 0x080000 /* AMSDU tx enabled */ #define IEEE80211_NODE_VHT 0x100000 /* VHT enabled */ +#define IEEE80211_NODE_LDPC 0x200000 /* LDPC enabled */ uint16_t ni_associd; /* association ID */ uint16_t ni_vlan; /* vlan tag */ uint16_t ni_txpower; /* current transmit power */ @@ -248,6 +249,11 @@ struct ieee80211_node { struct ieee80211vap *ni_wdsvap; /* associated WDS vap */ void *ni_rctls; /* private ratectl state */ + + /* quiet time IE state for the given node */ + uint32_t ni_quiet_ie_set; /* Quiet time IE was seen */ + struct ieee80211_quiet_ie ni_quiet_ie; /* last seen quiet IE */ + uint64_t ni_spare[3]; }; MALLOC_DECLARE(M_80211_NODE); diff --git a/freebsd/sys/net80211/ieee80211_output.c b/freebsd/sys/net80211/ieee80211_output.c index c9251796..bb42f945 100644 --- a/freebsd/sys/net80211/ieee80211_output.c +++ b/freebsd/sys/net80211/ieee80211_output.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #endif #include #include +#include #if defined(INET) || defined(INET6) #include @@ -123,9 +124,7 @@ ieee80211_vap_pkt_send_dest(struct ieee80211vap *vap, struct mbuf *m, { struct ieee80211com *ic = vap->iv_ic; struct ifnet *ifp = vap->iv_ifp; -#ifdef IEEE80211_SUPPORT_SUPERG int mcast; -#endif if ((ni->ni_flags & IEEE80211_NODE_PWR_MGT) && (m->m_flags & M_PWR_SAV) == 0) { @@ -165,9 +164,7 @@ ieee80211_vap_pkt_send_dest(struct ieee80211vap *vap, struct mbuf *m, * interface it (might have been) received on. */ m->m_pkthdr.rcvif = (void *)ni; -#ifdef IEEE80211_SUPPORT_SUPERG mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1: 0; -#endif BPF_MTAP(ifp, m); /* 802.3 tx */ @@ -182,10 +179,15 @@ ieee80211_vap_pkt_send_dest(struct ieee80211vap *vap, struct mbuf *m, * The default ic_ampdu_enable routine handles staggering * ADDBA requests in case the receiver NAK's us or we are * otherwise unable to establish a BA stream. + * + * Don't treat group-addressed frames as candidates for aggregation; + * net80211 doesn't support 802.11aa-2012 and so group addressed + * frames will always have sequence numbers allocated from the NON_QOS + * TID. */ if ((ni->ni_flags & IEEE80211_NODE_AMPDU_TX) && (vap->iv_flags_ht & IEEE80211_FHT_AMPDU_TX)) { - if ((m->m_flags & M_EAPOL) == 0) { + if ((m->m_flags & M_EAPOL) == 0 && (! mcast)) { int tid = WME_AC_TO_TID(M_WME_GETAC(m)); struct ieee80211_tx_ampdu *tap = &ni->ni_tx_ampdu[tid]; @@ -766,13 +768,31 @@ ieee80211_send_setup( } *(uint16_t *)&wh->i_dur[0] = 0; + /* + * XXX TODO: this is what the TX lock is for. + * Here we're incrementing sequence numbers, and they + * need to be in lock-step with what the driver is doing + * both in TX ordering and crypto encap (IV increment.) + * + * If the driver does seqno itself, then we can skip + * assigning sequence numbers here, and we can avoid + * requiring the TX lock. + */ tap = &ni->ni_tx_ampdu[tid]; - if (tid != IEEE80211_NONQOS_TID && IEEE80211_AMPDU_RUNNING(tap)) + if (tid != IEEE80211_NONQOS_TID && IEEE80211_AMPDU_RUNNING(tap)) { m->m_flags |= M_AMPDU_MPDU; - else { + } else { if (IEEE80211_HAS_SEQ(type & IEEE80211_FC0_TYPE_MASK, type & IEEE80211_FC0_SUBTYPE_MASK)) - seqno = ni->ni_txseqs[tid]++; + /* + * 802.11-2012 9.3.2.10 - QoS multicast frames + * come out of a different seqno space. + */ + if (IEEE80211_IS_MULTICAST(wh->i_addr1)) { + seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID]++; + } else { + seqno = ni->ni_txseqs[tid]++; + } else seqno = 0; @@ -1230,7 +1250,7 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni, struct ieee80211_frame *wh; struct ieee80211_key *key; struct llc *llc; - int hdrsize, hdrspace, datalen, addqos, txfrag, is4addr; + int hdrsize, hdrspace, datalen, addqos, txfrag, is4addr, is_mcast; ieee80211_seq seqno; int meshhdrsize, meshae; uint8_t *qos; @@ -1238,6 +1258,8 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni, IEEE80211_TX_LOCK_ASSERT(ic); + is_mcast = !! (m->m_flags & (M_MCAST | M_BCAST)); + /* * Copy existing Ethernet header to a safe place. The * rest of the code assumes it's ok to strip it when @@ -1282,11 +1304,19 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni, * ap's require all data frames to be QoS-encapsulated * once negotiated in which case we'll need to make this * configurable. - * NB: mesh data frames are QoS. + * + * Don't send multicast QoS frames. + * Technically multicast frames can be QoS if all stations in the + * BSS are also QoS. + * + * NB: mesh data frames are QoS, including multicast frames. */ - addqos = ((ni->ni_flags & (IEEE80211_NODE_QOS|IEEE80211_NODE_HT)) || + addqos = + (((is_mcast == 0) && (ni->ni_flags & + (IEEE80211_NODE_QOS|IEEE80211_NODE_HT))) || (vap->iv_opmode == IEEE80211_M_MBSS)) && (m->m_flags & M_EAPOL) == 0; + if (addqos) hdrsize = sizeof(struct ieee80211_qosframe); else @@ -1544,7 +1574,28 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni, if (is_amsdu) qos[0] |= IEEE80211_QOS_AMSDU; + /* + * XXX TODO TX lock is needed for atomic updates of sequence + * numbers. If the driver does it, then don't do it here; + * and we don't need the TX lock held. + */ if ((m->m_flags & M_AMPDU_MPDU) == 0) { + /* + * 802.11-2012 9.3.2.10 - + * + * If this is a multicast frame then we need + * to ensure that the sequence number comes from + * a separate seqno space and not the TID space. + * + * Otherwise multicast frames may actually cause + * holes in the TX blockack window space and + * upset various things. + */ + if (IEEE80211_IS_MULTICAST(wh->i_addr1)) + seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID]++; + else + seqno = ni->ni_txseqs[tid]++; + /* * NB: don't assign a sequence # to potential * aggregates; we expect this happens at the @@ -1563,6 +1614,11 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni, M_SEQNO_SET(m, seqno); } } else { + /* + * XXX TODO TX lock is needed for atomic updates of sequence + * numbers. If the driver does it, then don't do it here; + * and we don't need the TX lock held. + */ seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID]++; *(uint16_t *)wh->i_seq = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT); @@ -1577,12 +1633,20 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni, __func__); } + /* + * Check if xmit fragmentation is required. + * + * If the hardware does fragmentation offload, then don't bother + * doing it here. + */ + if (IEEE80211_CONF_FRAG_OFFLOAD(ic)) + txfrag = 0; + else + txfrag = (m->m_pkthdr.len > vap->iv_fragthreshold && + !IEEE80211_IS_MULTICAST(wh->i_addr1) && + (vap->iv_caps & IEEE80211_C_TXFRAG) && + (m->m_flags & (M_FF | M_AMPDU_MPDU)) == 0); - /* check if xmit fragmentation is required */ - txfrag = (m->m_pkthdr.len > vap->iv_fragthreshold && - !IEEE80211_IS_MULTICAST(wh->i_addr1) && - (vap->iv_caps & IEEE80211_C_TXFRAG) && - (m->m_flags & (M_FF | M_AMPDU_MPDU)) == 0); if (key != NULL) { /* * IEEE 802.1X: send EAPOL frames always in the clear. @@ -1962,16 +2026,23 @@ ieee80211_add_supportedchannels(uint8_t *frm, struct ieee80211com *ic) * Add an 11h Quiet time element to a frame. */ static uint8_t * -ieee80211_add_quiet(uint8_t *frm, struct ieee80211vap *vap) +ieee80211_add_quiet(uint8_t *frm, struct ieee80211vap *vap, int update) { struct ieee80211_quiet_ie *quiet = (struct ieee80211_quiet_ie *) frm; quiet->quiet_ie = IEEE80211_ELEMID_QUIET; quiet->len = 6; - if (vap->iv_quiet_count_value == 1) - vap->iv_quiet_count_value = vap->iv_quiet_count; - else if (vap->iv_quiet_count_value > 1) - vap->iv_quiet_count_value--; + + /* + * Only update every beacon interval - otherwise probe responses + * would update the quiet count value. + */ + if (update) { + if (vap->iv_quiet_count_value == 1) + vap->iv_quiet_count_value = vap->iv_quiet_count; + else if (vap->iv_quiet_count_value > 1) + vap->iv_quiet_count_value--; + } if (vap->iv_quiet_count_value == 0) { /* value 0 is reserved as per 802.11h standerd */ @@ -2113,6 +2184,7 @@ ieee80211_send_probereq(struct ieee80211_node *ni, * [tlv] RSN (optional) * [tlv] extended supported rates * [tlv] HT cap (optional) + * [tlv] VHT cap (optional) * [tlv] WPA (optional) * [tlv] user-specified ie's */ @@ -2121,7 +2193,8 @@ ieee80211_send_probereq(struct ieee80211_node *ni, 2 + IEEE80211_NWID_LEN + 2 + IEEE80211_RATE_SIZE + sizeof(struct ieee80211_ie_htcap) - + sizeof(struct ieee80211_ie_htinfo) + + sizeof(struct ieee80211_ie_vhtcap) + + sizeof(struct ieee80211_ie_htinfo) /* XXX not needed? */ + sizeof(struct ieee80211_ie_wpa) + 2 + (IEEE80211_RATE_MAXSIZE - IEEE80211_RATE_SIZE) + sizeof(struct ieee80211_ie_wpa) @@ -2161,6 +2234,21 @@ ieee80211_send_probereq(struct ieee80211_node *ni, frm = ieee80211_add_htcap_ch(frm, vap, c); } + /* + * XXX TODO: need to figure out what/how to update the + * VHT channel. + */ +#if 0 + (vap->iv_flags_vht & IEEE80211_FVHT_VHT) { + struct ieee80211_channel *c; + + c = ieee80211_ht_adjust_channel(ic, ic->ic_curchan, + vap->iv_flags_ht); + c = ieee80211_vht_adjust_channel(ic, c, vap->iv_flags_vht); + frm = ieee80211_add_vhtcap_ch(frm, vap, c); + } +#endif + frm = ieee80211_add_wpa(frm, vap); if (vap->iv_appie_probereq != NULL) frm = add_appie(frm, vap->iv_appie_probereq); @@ -2370,6 +2458,7 @@ ieee80211_send_mgmt(struct ieee80211_node *ni, int type, int arg) * [4] power capability (optional) * [28] supported channels (optional) * [tlv] HT capabilities + * [tlv] VHT capabilities * [tlv] WME (optional) * [tlv] Vendor OUI HT capabilities (optional) * [tlv] Atheros capabilities (if negotiated) @@ -2387,6 +2476,7 @@ ieee80211_send_mgmt(struct ieee80211_node *ni, int type, int arg) + 2 + 26 + sizeof(struct ieee80211_wme_info) + sizeof(struct ieee80211_ie_htcap) + + sizeof(struct ieee80211_ie_vhtcap) + 4 + sizeof(struct ieee80211_ie_htcap) #ifdef IEEE80211_SUPPORT_SUPERG + sizeof(struct ieee80211_ath_ie) @@ -2451,6 +2541,14 @@ ieee80211_send_mgmt(struct ieee80211_node *ni, int type, int arg) ni->ni_ies.htcap_ie[0] == IEEE80211_ELEMID_HTCAP) { frm = ieee80211_add_htcap(frm, ni); } + + if ((vap->iv_flags_vht & IEEE80211_FVHT_VHT) && + IEEE80211_IS_CHAN_VHT(ni->ni_chan) && + ni->ni_ies.vhtcap_ie != NULL && + ni->ni_ies.vhtcap_ie[0] == IEEE80211_ELEMID_VHT_CAP) { + frm = ieee80211_add_vhtcap(frm, ni); + } + frm = ieee80211_add_wpa(frm, vap); if ((ic->ic_flags & IEEE80211_F_WME) && ni->ni_ies.wme_ie != NULL) @@ -2494,6 +2592,8 @@ ieee80211_send_mgmt(struct ieee80211_node *ni, int type, int arg) * [tlv] extended supported rates * [tlv] HT capabilities (standard, if STA enabled) * [tlv] HT information (standard, if STA enabled) + * [tlv] VHT capabilities (standard, if STA enabled) + * [tlv] VHT information (standard, if STA enabled) * [tlv] WME (if configured and STA enabled) * [tlv] HT capabilities (vendor OUI, if STA enabled) * [tlv] HT information (vendor OUI, if STA enabled) @@ -2509,6 +2609,8 @@ ieee80211_send_mgmt(struct ieee80211_node *ni, int type, int arg) + 2 + (IEEE80211_RATE_MAXSIZE - IEEE80211_RATE_SIZE) + sizeof(struct ieee80211_ie_htcap) + 4 + sizeof(struct ieee80211_ie_htinfo) + 4 + + sizeof(struct ieee80211_ie_vhtcap) + + sizeof(struct ieee80211_ie_vht_operation) + sizeof(struct ieee80211_wme_param) #ifdef IEEE80211_SUPPORT_SUPERG + sizeof(struct ieee80211_ath_ie) @@ -2547,6 +2649,10 @@ ieee80211_send_mgmt(struct ieee80211_node *ni, int type, int arg) frm = ieee80211_add_htcap_vendor(frm, ni); frm = ieee80211_add_htinfo_vendor(frm, ni); } + if (ni->ni_flags & IEEE80211_NODE_VHT) { + frm = ieee80211_add_vhtcap(frm, ni); + frm = ieee80211_add_vhtinfo(frm, ni); + } #ifdef IEEE80211_SUPPORT_SUPERG if (IEEE80211_ATH_CAP(vap, ni, IEEE80211_F_ATHEROS)) frm = ieee80211_add_ath(frm, @@ -2629,6 +2735,8 @@ ieee80211_alloc_proberesp(struct ieee80211_node *bss, int legacy) * [tlv] RSN (optional) * [tlv] HT capabilities * [tlv] HT information + * [tlv] VHT capabilities + * [tlv] VHT information * [tlv] WPA (optional) * [tlv] WME (optional) * [tlv] Vendor OUI HT capabilities (optional) @@ -2659,6 +2767,8 @@ ieee80211_alloc_proberesp(struct ieee80211_node *bss, int legacy) + sizeof(struct ieee80211_wme_param) + 4 + sizeof(struct ieee80211_ie_htcap) + 4 + sizeof(struct ieee80211_ie_htinfo) + + sizeof(struct ieee80211_ie_vhtcap) + + sizeof(struct ieee80211_ie_vht_operation) #ifdef IEEE80211_SUPPORT_SUPERG + sizeof(struct ieee80211_ath_ie) #endif @@ -2720,7 +2830,7 @@ ieee80211_alloc_proberesp(struct ieee80211_node *bss, int legacy) if (IEEE80211_IS_CHAN_DFS(ic->ic_bsschan) && (vap->iv_flags_ext & IEEE80211_FEXT_DFS)) { if (vap->iv_quiet) - frm = ieee80211_add_quiet(frm, vap); + frm = ieee80211_add_quiet(frm, vap, 0); } } if (IEEE80211_IS_CHAN_ANYG(bss->ni_chan)) @@ -2738,6 +2848,11 @@ ieee80211_alloc_proberesp(struct ieee80211_node *bss, int legacy) frm = ieee80211_add_htcap(frm, bss); frm = ieee80211_add_htinfo(frm, bss); } + if (IEEE80211_IS_CHAN_VHT(bss->ni_chan) && + legacy != IEEE80211_SEND_LEGACY_11B) { + frm = ieee80211_add_vhtcap(frm, bss); + frm = ieee80211_add_vhtinfo(frm, bss); + } frm = ieee80211_add_wpa(frm, vap); if (vap->iv_flags & IEEE80211_F_WME) frm = ieee80211_add_wme_param(frm, &ic->ic_wme); @@ -2948,6 +3063,10 @@ ieee80211_beacon_construct(struct mbuf *m, uint8_t *frm, /* * beacon frame format + * + * TODO: update to 802.11-2012; a lot of stuff has changed; + * vendor extensions should be at the end, etc. + * * [8] time stamp * [2] beacon interval * [2] cabability information @@ -2959,11 +3078,34 @@ ieee80211_beacon_construct(struct mbuf *m, uint8_t *frm, * [tlv] country (optional) * [3] power control (optional) * [5] channel switch announcement (CSA) (optional) + * XXX TODO: Quiet + * XXX TODO: IBSS DFS + * XXX TODO: TPC report * [tlv] extended rate phy (ERP) * [tlv] extended supported rates * [tlv] RSN parameters + * XXX TODO: BSSLOAD + * (XXX EDCA parameter set, QoS capability?) + * XXX TODO: AP channel report + * * [tlv] HT capabilities * [tlv] HT information + * XXX TODO: 20/40 BSS coexistence + * Mesh: + * XXX TODO: Meshid + * XXX TODO: mesh config + * XXX TODO: mesh awake window + * XXX TODO: beacon timing (mesh, etc) + * XXX TODO: MCCAOP Advertisement Overview + * XXX TODO: MCCAOP Advertisement + * XXX TODO: Mesh channel switch parameters + * VHT: + * XXX TODO: VHT capabilities + * XXX TODO: VHT operation + * XXX TODO: VHT transmit power envelope + * XXX TODO: channel switch wrapper element + * XXX TODO: extended BSS load element + * * XXX Vendor-specific OIDs (e.g. Atheros) * [tlv] WPA parameters * [tlv] WME parameters @@ -3036,15 +3178,23 @@ ieee80211_beacon_construct(struct mbuf *m, uint8_t *frm, } else bo->bo_csa = frm; + bo->bo_quiet = NULL; if (vap->iv_flags & IEEE80211_F_DOTH) { - bo->bo_quiet = frm; if (IEEE80211_IS_CHAN_DFS(ic->ic_bsschan) && - (vap->iv_flags_ext & IEEE80211_FEXT_DFS)) { - if (vap->iv_quiet) - frm = ieee80211_add_quiet(frm,vap); + (vap->iv_flags_ext & IEEE80211_FEXT_DFS) && + (vap->iv_quiet == 1)) { + /* + * We only insert the quiet IE offset if + * the quiet IE is enabled. Otherwise don't + * put it here or we'll just overwrite + * some other beacon contents. + */ + if (vap->iv_quiet) { + bo->bo_quiet = frm; + frm = ieee80211_add_quiet(frm,vap, 0); + } } - } else - bo->bo_quiet = frm; + } if (IEEE80211_IS_CHAN_ANYG(ni->ni_chan)) { bo->bo_erp = frm; @@ -3057,6 +3207,16 @@ ieee80211_beacon_construct(struct mbuf *m, uint8_t *frm, bo->bo_htinfo = frm; frm = ieee80211_add_htinfo(frm, ni); } + + if (IEEE80211_IS_CHAN_VHT(ni->ni_chan)) { + frm = ieee80211_add_vhtcap(frm, ni); + bo->bo_vhtinfo = frm; + frm = ieee80211_add_vhtinfo(frm, ni); + /* Transmit power envelope */ + /* Channel switch wrapper element */ + /* Extended bss load element */ + } + frm = ieee80211_add_wpa(frm, vap); if (vap->iv_flags & IEEE80211_F_WME) { bo->bo_wme = frm; @@ -3067,6 +3227,7 @@ ieee80211_beacon_construct(struct mbuf *m, uint8_t *frm, frm = ieee80211_add_htcap_vendor(frm, ni); frm = ieee80211_add_htinfo_vendor(frm, ni); } + #ifdef IEEE80211_SUPPORT_SUPERG if (vap->iv_flags & IEEE80211_F_ATHEROS) { bo->bo_ath = frm; @@ -3084,6 +3245,8 @@ ieee80211_beacon_construct(struct mbuf *m, uint8_t *frm, bo->bo_appie_len = vap->iv_appie_beacon->ie_len; frm = add_appie(frm, vap->iv_appie_beacon); } + + /* XXX TODO: move meshid/meshconf up to before vendor extensions? */ #ifdef IEEE80211_SUPPORT_MESH if (vap->iv_opmode == IEEE80211_M_MBSS) { frm = ieee80211_add_meshid(frm, vap); @@ -3110,8 +3273,19 @@ ieee80211_beacon_alloc(struct ieee80211_node *ni) int pktlen; uint8_t *frm; + /* + * Update the "We're putting the quiet IE in the beacon" state. + */ + if (vap->iv_quiet == 1) + vap->iv_flags_ext |= IEEE80211_FEXT_QUIET_IE; + else if (vap->iv_quiet == 0) + vap->iv_flags_ext &= ~IEEE80211_FEXT_QUIET_IE; + /* * beacon frame format + * + * Note: This needs updating for 802.11-2012. + * * [8] time stamp * [2] beacon interval * [2] cabability information @@ -3128,6 +3302,8 @@ ieee80211_beacon_alloc(struct ieee80211_node *ni) * [tlv] RSN parameters * [tlv] HT capabilities * [tlv] HT information + * [tlv] VHT capabilities + * [tlv] VHT operation * [tlv] Vendor OUI HT capabilities (optional) * [tlv] Vendor OUI HT information (optional) * XXX Vendor-specific OIDs (e.g. Atheros) @@ -3159,6 +3335,8 @@ ieee80211_beacon_alloc(struct ieee80211_node *ni) /* XXX conditional? */ + 4+2*sizeof(struct ieee80211_ie_htcap)/* HT caps */ + 4+2*sizeof(struct ieee80211_ie_htinfo)/* HT info */ + + sizeof(struct ieee80211_ie_vhtcap)/* VHT caps */ + + sizeof(struct ieee80211_ie_vht_operation)/* VHT info */ + (vap->iv_caps & IEEE80211_C_WME ? /* WME */ sizeof(struct ieee80211_wme_param) : 0) #ifdef IEEE80211_SUPPORT_SUPERG @@ -3243,7 +3421,52 @@ ieee80211_beacon_update(struct ieee80211_node *ni, struct mbuf *m, int mcast) return 1; /* just assume length changed */ } + /* + * Handle the quiet time element being added and removed. + * Again, for now we just cheat and reconstruct the whole + * beacon - that way the gap is provided as appropriate. + * + * So, track whether we have already added the IE versus + * whether we want to be adding the IE. + */ + if ((vap->iv_flags_ext & IEEE80211_FEXT_QUIET_IE) && + (vap->iv_quiet == 0)) { + /* + * Quiet time beacon IE enabled, but it's disabled; + * recalc + */ + vap->iv_flags_ext &= ~IEEE80211_FEXT_QUIET_IE; + ieee80211_beacon_construct(m, + mtod(m, uint8_t*) + sizeof(struct ieee80211_frame), ni); + /* XXX do WME aggressive mode processing? */ + IEEE80211_UNLOCK(ic); + return 1; /* just assume length changed */ + } + + if (((vap->iv_flags_ext & IEEE80211_FEXT_QUIET_IE) == 0) && + (vap->iv_quiet == 1)) { + /* + * Quiet time beacon IE disabled, but it's now enabled; + * recalc + */ + vap->iv_flags_ext |= IEEE80211_FEXT_QUIET_IE; + ieee80211_beacon_construct(m, + mtod(m, uint8_t*) + sizeof(struct ieee80211_frame), ni); + /* XXX do WME aggressive mode processing? */ + IEEE80211_UNLOCK(ic); + return 1; /* just assume length changed */ + } + wh = mtod(m, struct ieee80211_frame *); + + /* + * XXX TODO Strictly speaking this should be incremented with the TX + * lock held so as to serialise access to the non-qos TID sequence + * number space. + * + * If the driver identifies it does its own TX seqno management then + * we can skip this (and still not do the TX seqno.) + */ seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID]++; *(uint16_t *)&wh->i_seq[0] = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT); @@ -3349,6 +3572,10 @@ ieee80211_beacon_update(struct ieee80211_node *ni, struct mbuf *m, int mcast) timoff = 0; timlen = 1; } + + /* + * TODO: validate this! + */ if (timlen != bo->bo_tim_len) { /* copy up/down trailer */ int adjust = tie->tim_bitmap+timlen @@ -3359,6 +3586,7 @@ ieee80211_beacon_update(struct ieee80211_node *ni, struct mbuf *m, int mcast) bo->bo_tim_trailer += adjust; bo->bo_erp += adjust; bo->bo_htinfo += adjust; + bo->bo_vhtinfo += adjust; #ifdef IEEE80211_SUPPORT_SUPERG bo->bo_ath += adjust; #endif @@ -3413,6 +3641,7 @@ ieee80211_beacon_update(struct ieee80211_node *ni, struct mbuf *m, int mcast) memmove(&csa[1], csa, bo->bo_csa_trailer_len); bo->bo_erp += sizeof(*csa); bo->bo_htinfo += sizeof(*csa); + bo->bo_vhtinfo += sizeof(*csa); bo->bo_wme += sizeof(*csa); #ifdef IEEE80211_SUPPORT_SUPERG bo->bo_ath += sizeof(*csa); @@ -3436,10 +3665,17 @@ ieee80211_beacon_update(struct ieee80211_node *ni, struct mbuf *m, int mcast) vap->iv_csa_count++; /* NB: don't clear IEEE80211_BEACON_CSA */ } + + /* + * Only add the quiet time IE if we've enabled it + * as appropriate. + */ if (IEEE80211_IS_CHAN_DFS(ic->ic_bsschan) && - (vap->iv_flags_ext & IEEE80211_FEXT_DFS) ){ - if (vap->iv_quiet) - ieee80211_add_quiet(bo->bo_quiet, vap); + (vap->iv_flags_ext & IEEE80211_FEXT_DFS)) { + if (vap->iv_quiet && + (vap->iv_flags_ext & IEEE80211_FEXT_QUIET_IE)) { + ieee80211_add_quiet(bo->bo_quiet, vap, 1); + } } if (isset(bo->bo_flags, IEEE80211_BEACON_ERP)) { /* diff --git a/freebsd/sys/net80211/ieee80211_proto.c b/freebsd/sys/net80211/ieee80211_proto.c index 99a8ac99..8389404b 100644 --- a/freebsd/sys/net80211/ieee80211_proto.c +++ b/freebsd/sys/net80211/ieee80211_proto.c @@ -243,7 +243,7 @@ static void update_mcast(void *, int); static void update_promisc(void *, int); static void update_channel(void *, int); static void update_chw(void *, int); -static void update_wme(void *, int); +static void vap_update_wme(void *, int); static void restart_vaps(void *, int); static void ieee80211_newstate_cb(void *, int); @@ -282,7 +282,6 @@ ieee80211_proto_attach(struct ieee80211com *ic) TASK_INIT(&ic->ic_chan_task, 0, update_channel, ic); TASK_INIT(&ic->ic_bmiss_task, 0, beacon_miss, ic); TASK_INIT(&ic->ic_chw_task, 0, update_chw, ic); - TASK_INIT(&ic->ic_wme_task, 0, update_wme, ic); TASK_INIT(&ic->ic_restart_task, 0, restart_vaps, ic); ic->ic_wme.wme_hipri_switch_hysteresis = @@ -340,6 +339,7 @@ ieee80211_proto_vattach(struct ieee80211vap *vap) callout_init(&vap->iv_mgtsend, 1); TASK_INIT(&vap->iv_nstate_task, 0, ieee80211_newstate_cb, vap); TASK_INIT(&vap->iv_swbmiss_task, 0, beacon_swmiss, vap); + TASK_INIT(&vap->iv_wme_task, 0, vap_update_wme, vap); /* * Install default tx rate handling: no fixed rate, lowest * supported rate for mgmt and multicast frames. Default @@ -844,6 +844,9 @@ setbasicrates(struct ieee80211_rateset *rs, [IEEE80211_MODE_11NA] = { 3, { 12, 24, 48 } }, /* NB: mixed b/g */ [IEEE80211_MODE_11NG] = { 4, { 2, 4, 11, 22 } }, + /* NB: mixed b/g */ + [IEEE80211_MODE_VHT_2GHZ] = { 4, { 2, 4, 11, 22 } }, + [IEEE80211_MODE_VHT_5GHZ] = { 3, { 12, 24, 48 } }, }; int i, j; @@ -908,6 +911,8 @@ static const struct phyParamType phyParamForAC_BE[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_QUARTER]= { 3, 4, 6, 0, 0 }, [IEEE80211_MODE_11NA] = { 3, 4, 6, 0, 0 }, [IEEE80211_MODE_11NG] = { 3, 4, 6, 0, 0 }, + [IEEE80211_MODE_VHT_2GHZ] = { 3, 4, 6, 0, 0 }, + [IEEE80211_MODE_VHT_5GHZ] = { 3, 4, 6, 0, 0 }, }; static const struct phyParamType phyParamForAC_BK[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = { 7, 4, 10, 0, 0 }, @@ -922,6 +927,8 @@ static const struct phyParamType phyParamForAC_BK[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_QUARTER]= { 7, 4, 10, 0, 0 }, [IEEE80211_MODE_11NA] = { 7, 4, 10, 0, 0 }, [IEEE80211_MODE_11NG] = { 7, 4, 10, 0, 0 }, + [IEEE80211_MODE_VHT_2GHZ] = { 7, 4, 10, 0, 0 }, + [IEEE80211_MODE_VHT_5GHZ] = { 7, 4, 10, 0, 0 }, }; static const struct phyParamType phyParamForAC_VI[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = { 1, 3, 4, 94, 0 }, @@ -936,6 +943,8 @@ static const struct phyParamType phyParamForAC_VI[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_QUARTER]= { 1, 3, 4, 94, 0 }, [IEEE80211_MODE_11NA] = { 1, 3, 4, 94, 0 }, [IEEE80211_MODE_11NG] = { 1, 3, 4, 94, 0 }, + [IEEE80211_MODE_VHT_2GHZ] = { 1, 3, 4, 94, 0 }, + [IEEE80211_MODE_VHT_5GHZ] = { 1, 3, 4, 94, 0 }, }; static const struct phyParamType phyParamForAC_VO[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = { 1, 2, 3, 47, 0 }, @@ -950,6 +959,8 @@ static const struct phyParamType phyParamForAC_VO[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_QUARTER]= { 1, 2, 3, 47, 0 }, [IEEE80211_MODE_11NA] = { 1, 2, 3, 47, 0 }, [IEEE80211_MODE_11NG] = { 1, 2, 3, 47, 0 }, + [IEEE80211_MODE_VHT_2GHZ] = { 1, 2, 3, 47, 0 }, + [IEEE80211_MODE_VHT_5GHZ] = { 1, 2, 3, 47, 0 }, }; static const struct phyParamType bssPhyParamForAC_BE[IEEE80211_MODE_MAX] = { @@ -1125,6 +1136,8 @@ ieee80211_wme_updateparams_locked(struct ieee80211vap *vap) [IEEE80211_MODE_QUARTER] = { 2, 4, 10, 64, 0 }, [IEEE80211_MODE_11NA] = { 2, 4, 10, 64, 0 }, /* XXXcheck*/ [IEEE80211_MODE_11NG] = { 2, 4, 10, 64, 0 }, /* XXXcheck*/ + [IEEE80211_MODE_VHT_2GHZ] = { 2, 4, 10, 64, 0 }, /* XXXcheck*/ + [IEEE80211_MODE_VHT_5GHZ] = { 2, 4, 10, 64, 0 }, /* XXXcheck*/ }; struct ieee80211com *ic = vap->iv_ic; struct ieee80211_wme_state *wme = &ic->ic_wme; @@ -1245,6 +1258,8 @@ ieee80211_wme_updateparams_locked(struct ieee80211vap *vap) [IEEE80211_MODE_QUARTER] = 3, [IEEE80211_MODE_11NA] = 3, [IEEE80211_MODE_11NG] = 3, + [IEEE80211_MODE_VHT_2GHZ] = 3, + [IEEE80211_MODE_VHT_5GHZ] = 3, }; chanp = &wme->wme_chanParams.cap_wmeParams[WME_AC_BE]; bssp = &wme->wme_bssChanParams.cap_wmeParams[WME_AC_BE]; @@ -1272,7 +1287,7 @@ ieee80211_wme_updateparams_locked(struct ieee80211vap *vap) } /* schedule the deferred WME update */ - ieee80211_runtask(ic, &ic->ic_wme_task); + ieee80211_runtask(ic, &vap->iv_wme_task); IEEE80211_DPRINTF(vap, IEEE80211_MSG_WME, "%s: WME params updated, cap_info 0x%x\n", __func__, @@ -1337,15 +1352,25 @@ update_chw(void *arg, int npending) ic->ic_update_chw(ic); } +/* + * Deferred WME update. + * + * In preparation for per-VAP WME configuration, call the VAP + * method if the VAP requires it. Otherwise, just call the + * older global method. There isn't a per-VAP WME configuration + * just yet so for now just use the global configuration. + */ static void -update_wme(void *arg, int npending) +vap_update_wme(void *arg, int npending) { - struct ieee80211com *ic = arg; + struct ieee80211vap *vap = arg; + struct ieee80211com *ic = vap->iv_ic; - /* - * XXX should we defer the WME configuration update until now? - */ - ic->ic_wme.wme_update(ic); + if (vap->iv_wme_update != NULL) + vap->iv_wme_update(vap, + ic->ic_wme.wme_chanParams.cap_wmeParams); + else + ic->ic_wme.wme_update(ic); } static void @@ -1372,7 +1397,6 @@ ieee80211_waitfor_parent(struct ieee80211com *ic) ieee80211_draintask(ic, &ic->ic_chan_task); ieee80211_draintask(ic, &ic->ic_bmiss_task); ieee80211_draintask(ic, &ic->ic_chw_task); - ieee80211_draintask(ic, &ic->ic_wme_task); taskqueue_unblock(ic->ic_tq); } diff --git a/freebsd/sys/net80211/ieee80211_proto.h b/freebsd/sys/net80211/ieee80211_proto.h index c4d5e7b0..784179fd 100644 --- a/freebsd/sys/net80211/ieee80211_proto.h +++ b/freebsd/sys/net80211/ieee80211_proto.h @@ -391,6 +391,8 @@ enum { IEEE80211_BEACON_TDMA = 9, /* TDMA Info */ IEEE80211_BEACON_ATH = 10, /* ATH parameters */ IEEE80211_BEACON_MESHCONF = 11, /* Mesh Configuration */ + IEEE80211_BEACON_QUIET = 12, /* Quiet time IE */ + IEEE80211_BEACON_VHTINFO = 13, /* VHT information */ }; int ieee80211_beacon_update(struct ieee80211_node *, struct mbuf *, int mcast); diff --git a/freebsd/sys/net80211/ieee80211_scan_sta.c b/freebsd/sys/net80211/ieee80211_scan_sta.c index faaaf8a3..cbef5cd8 100644 --- a/freebsd/sys/net80211/ieee80211_scan_sta.c +++ b/freebsd/sys/net80211/ieee80211_scan_sta.c @@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$"); #include #endif #include +#include #include @@ -327,14 +328,33 @@ found: } } else ise->se_chan = curchan; + + /* VHT demotion */ + if (IEEE80211_IS_CHAN_VHT(ise->se_chan) && sp->vhtcap == NULL) { + IEEE80211_DPRINTF(vap, IEEE80211_MSG_11N, + "%s: demoting VHT->HT %d/0x%08x\n", + __func__, ise->se_chan->ic_freq, ise->se_chan->ic_flags); + /* Demote legacy networks to a non-VHT channel. */ + c = ieee80211_find_channel(ic, ise->se_chan->ic_freq, + ise->se_chan->ic_flags & ~IEEE80211_CHAN_VHT); + KASSERT(c != NULL, + ("no non-VHT channel %u", ise->se_chan->ic_ieee)); + ise->se_chan = c; + } + + /* HT demotion */ if (IEEE80211_IS_CHAN_HT(ise->se_chan) && sp->htcap == NULL) { /* Demote legacy networks to a non-HT channel. */ + IEEE80211_DPRINTF(vap, IEEE80211_MSG_11N, + "%s: demoting HT->legacy %d/0x%08x\n", + __func__, ise->se_chan->ic_freq, ise->se_chan->ic_flags); c = ieee80211_find_channel(ic, ise->se_chan->ic_freq, ise->se_chan->ic_flags & ~IEEE80211_CHAN_HT); KASSERT(c != NULL, ("no legacy channel %u", ise->se_chan->ic_ieee)); ise->se_chan = c; } + ise->se_fhdwell = sp->fhdwell; ise->se_fhindex = sp->fhindex; ise->se_erp = sp->erp; @@ -533,10 +553,11 @@ sweepchannels(struct ieee80211_scan_state *ss, struct ieee80211vap *vap, /* * Ignore dynamic turbo channels; we scan them * in normal mode (i.e. not boosted). Likewise - * for HT channels, they get scanned using + * for HT/VHT channels, they get scanned using * legacy rates. */ - if (IEEE80211_IS_CHAN_DTURBO(c) || IEEE80211_IS_CHAN_HT(c)) + if (IEEE80211_IS_CHAN_DTURBO(c) || IEEE80211_IS_CHAN_HT(c) || + IEEE80211_IS_CHAN_VHT(c)) continue; /* @@ -821,6 +842,9 @@ maxrate(const struct ieee80211_scan_entry *se) * that we assume compatibility/usability has already been checked * so we don't need to (e.g. validate whether privacy is supported). * Used to select the best scan candidate for association in a BSS. + * + * TODO: should we take 11n, 11ac into account when selecting the + * best? Right now it just compares frequency band and RSSI. */ static int sta_compare(const struct sta_entry *a, const struct sta_entry *b) @@ -1628,6 +1652,8 @@ notfound: */ chan = ieee80211_ht_adjust_channel(ic, chan, vap->iv_flags_ht); + chan = ieee80211_vht_adjust_channel(ic, + chan, vap->iv_flags_vht); ieee80211_create_ibss(vap, chan); return 1; } @@ -1659,6 +1685,8 @@ notfound: */ chan = ieee80211_ht_adjust_channel(ic, chan, vap->iv_flags_ht); + chan = ieee80211_vht_adjust_channel(ic, + chan, vap->iv_flags_vht); if (!ieee80211_sta_join(vap, chan, &selbs->base)) goto notfound; return 1; /* terminate scan */ @@ -1778,7 +1806,7 @@ static int ap_end(struct ieee80211_scan_state *ss, struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; - struct ieee80211_channel *bestchan; + struct ieee80211_channel *bestchan, *chan; KASSERT(vap->iv_opmode == IEEE80211_M_HOSTAP, ("wrong opmode %u", vap->iv_opmode)); @@ -1810,8 +1838,10 @@ ap_end(struct ieee80211_scan_state *ss, struct ieee80211vap *vap) ss->ss_flags &= ~IEEE80211_SCAN_NOPICK; return 1; } - ieee80211_create_ibss(vap, - ieee80211_ht_adjust_channel(ic, bestchan, vap->iv_flags_ht)); + chan = ieee80211_ht_adjust_channel(ic, bestchan, vap->iv_flags_ht); + chan = ieee80211_vht_adjust_channel(ic, chan, vap->iv_flags_vht); + ieee80211_create_ibss(vap, chan); + return 1; } @@ -1883,10 +1913,14 @@ notfound: IEEE80211_IS_CHAN_RADAR(vap->iv_des_chan)) { struct ieee80211com *ic = vap->iv_ic; + /* XXX VHT */ chan = adhoc_pick_channel(ss, 0); - if (chan != NULL) + if (chan != NULL) { chan = ieee80211_ht_adjust_channel(ic, chan, vap->iv_flags_ht); + chan = ieee80211_vht_adjust_channel(ic, + chan, vap->iv_flags_vht); + } } else chan = vap->iv_des_chan; if (chan != NULL) { diff --git a/freebsd/sys/net80211/ieee80211_sta.c b/freebsd/sys/net80211/ieee80211_sta.c index 68441806..68bbb36b 100644 --- a/freebsd/sys/net80211/ieee80211_sta.c +++ b/freebsd/sys/net80211/ieee80211_sta.c @@ -66,6 +66,7 @@ __FBSDID("$FreeBSD$"); #endif #include #include +#include #define IEEE80211_RATE2MBS(r) (((r) & IEEE80211_RATE_VAL) / 2) @@ -1320,6 +1321,27 @@ startbgscan(struct ieee80211vap *vap) ieee80211_time_after(ticks, ic->ic_lastdata + vap->iv_bgscanidle))); } +#ifdef notyet +/* + * Compare two quiet IEs and return if they are equivalent. + * + * The tbttcount isnt checked - that's not part of the configuration. + */ +static int +compare_quiet_ie(const struct ieee80211_quiet_ie *q1, + const struct ieee80211_quiet_ie *q2) +{ + + if (q1->period != q2->period) + return (0); + if (le16dec(&q1->duration) != le16dec(&q2->duration)) + return (0); + if (le16dec(&q1->offset) != le16dec(&q2->offset)) + return (0); + return (1); +} +#endif + static void sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype, const struct ieee80211_rx_stats *rxs, @@ -1332,8 +1354,9 @@ sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype, struct ieee80211_frame *wh; uint8_t *frm, *efrm; uint8_t *rates, *xrates, *wme, *htcap, *htinfo; + uint8_t *vhtcap, *vhtopmode; uint8_t rate; - int ht_state_change = 0; + int ht_state_change = 0, do_ht = 0; wh = mtod(m0, struct ieee80211_frame *); frm = (uint8_t *)&wh[1]; @@ -1432,12 +1455,43 @@ sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype, if (scan.htcap != NULL && scan.htinfo != NULL && (vap->iv_flags_ht & IEEE80211_FHT_HT)) { /* XXX state changes? */ - if (ieee80211_ht_updateparams(ni, + ieee80211_ht_updateparams(ni, + scan.htcap, scan.htinfo); + do_ht = 1; + } + if (scan.vhtcap != NULL && scan.vhtopmode != NULL && + (vap->iv_flags_vht & IEEE80211_FVHT_VHT)) { + /* XXX state changes? */ + ieee80211_vht_updateparams(ni, + scan.vhtcap, scan.vhtopmode); + do_ht = 1; + } + if (do_ht) { + if (ieee80211_ht_updateparams_final(ni, scan.htcap, scan.htinfo)) ht_state_change = 1; } - if (scan.quiet) + + /* + * If we have a quiet time IE then report it up to + * the driver. + * + * Otherwise, inform the driver that the quiet time + * IE has disappeared - only do that once rather than + * spamming it each time. + */ + if (scan.quiet) { ic->ic_set_quiet(ni, scan.quiet); + ni->ni_quiet_ie_set = 1; + memcpy(&ni->ni_quiet_ie, scan.quiet, + sizeof(struct ieee80211_quiet_ie)); + } else { + if (ni->ni_quiet_ie_set == 1) + ic->ic_set_quiet(ni, NULL); + ni->ni_quiet_ie_set = 0; + bzero(&ni->ni_quiet_ie, + sizeof(struct ieee80211_quiet_ie)); + } if (scan.tim != NULL) { struct ieee80211_tim_ie *tim = @@ -1662,6 +1716,7 @@ sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype, frm += 2; rates = xrates = wme = htcap = htinfo = NULL; + vhtcap = vhtopmode = NULL; while (efrm - frm > 1) { IEEE80211_VERIFY_LENGTH(efrm - frm, frm[1] + 2, return); switch (*frm) { @@ -1695,6 +1750,12 @@ sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype, } /* XXX Atheros OUI support */ break; + case IEEE80211_ELEMID_VHT_CAP: + vhtcap = frm; + break; + case IEEE80211_ELEMID_VHT_OPMODE: + vhtopmode = frm; + break; } frm += frm[1] + 2; } @@ -1739,9 +1800,30 @@ sta_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype, (vap->iv_flags_ht & IEEE80211_FHT_HT)) { ieee80211_ht_node_init(ni); ieee80211_ht_updateparams(ni, htcap, htinfo); + + if ((vhtcap != NULL) && (vhtopmode != NULL) & + (vap->iv_flags_vht & IEEE80211_FVHT_VHT)) { + /* + * Log if we get a VHT assoc/reassoc response. + * We aren't ready for 2GHz VHT support. + */ + if (IEEE80211_IS_CHAN_2GHZ(ni->ni_chan)) { + printf("%s: peer %6D: VHT on 2GHz, ignoring\n", + __func__, + ni->ni_macaddr, + ":"); + } else { + ieee80211_vht_node_init(ni); + ieee80211_vht_updateparams(ni, vhtcap, vhtopmode); + ieee80211_setup_vht_rates(ni, vhtcap, vhtopmode); + } + } + + ieee80211_ht_updateparams_final(ni, htcap, htinfo); ieee80211_setup_htrates(ni, htcap, IEEE80211_F_JOIN | IEEE80211_F_DOBRS); ieee80211_setup_basic_htrates(ni, htinfo); + ieee80211_node_setuptxparms(ni); ieee80211_ratectl_node_init(ni); } diff --git a/freebsd/sys/net80211/ieee80211_superg.c b/freebsd/sys/net80211/ieee80211_superg.c index 2ca3e61a..4a5f68d5 100644 --- a/freebsd/sys/net80211/ieee80211_superg.c +++ b/freebsd/sys/net80211/ieee80211_superg.c @@ -96,6 +96,15 @@ SYSCTL_PROC(_net_wlan, OID_AUTO, ffagemax, CTLTYPE_INT | CTLFLAG_RW, &ieee80211_ffagemax, 0, ieee80211_sysctl_msecs_ticks, "I", "max hold time for fast-frame staging (ms)"); +static void +ff_age_all(void *arg, int npending) +{ + struct ieee80211com *ic = arg; + + /* XXX cache timer value somewhere (racy) */ + ieee80211_ff_age_all(ic, ieee80211_ffagemax + 1); +} + void ieee80211_superg_attach(struct ieee80211com *ic) { @@ -111,6 +120,7 @@ ieee80211_superg_attach(struct ieee80211com *ic) __func__); return; } + TIMEOUT_TASK_INIT(ic->ic_tq, &sg->ff_qtimer, 0, ff_age_all, ic); ic->ic_superg = sg; /* @@ -124,12 +134,16 @@ ieee80211_superg_attach(struct ieee80211com *ic) void ieee80211_superg_detach(struct ieee80211com *ic) { - IEEE80211_FF_LOCK_DESTROY(ic); if (ic->ic_superg != NULL) { + struct timeout_task *qtask = &ic->ic_superg->ff_qtimer; + + while (taskqueue_cancel_timeout(ic->ic_tq, qtask, NULL) != 0) + taskqueue_drain_timeout(ic->ic_tq, qtask); IEEE80211_FREE(ic->ic_superg, M_80211_VAP); ic->ic_superg = NULL; } + IEEE80211_FF_LOCK_DESTROY(ic); } void @@ -670,8 +684,12 @@ stageq_add(struct ieee80211com *ic, struct ieee80211_stageq *sq, struct mbuf *m) if (sq->tail != NULL) { sq->tail->m_nextpkt = m; age -= M_AGE_GET(sq->head); - } else + } else { sq->head = m; + + struct timeout_task *qtask = &ic->ic_superg->ff_qtimer; + taskqueue_enqueue_timeout(ic->ic_tq, qtask, age); + } KASSERT(age >= 0, ("age %d", age)); M_AGE_SET(m, age); m->m_nextpkt = NULL; diff --git a/freebsd/sys/net80211/ieee80211_superg.h b/freebsd/sys/net80211/ieee80211_superg.h index 2f8628c3..2c8a6a0b 100644 --- a/freebsd/sys/net80211/ieee80211_superg.h +++ b/freebsd/sys/net80211/ieee80211_superg.h @@ -66,6 +66,8 @@ struct ieee80211_stageq { struct ieee80211_superg { /* fast-frames staging q */ struct ieee80211_stageq ff_stageq[WME_NUM_AC]; + /* flush queues automatically */ + struct timeout_task ff_qtimer; }; void ieee80211_superg_attach(struct ieee80211com *); diff --git a/freebsd/sys/net80211/ieee80211_tdma.c b/freebsd/sys/net80211/ieee80211_tdma.c index c14ccab5..45c71f20 100644 --- a/freebsd/sys/net80211/ieee80211_tdma.c +++ b/freebsd/sys/net80211/ieee80211_tdma.c @@ -178,6 +178,8 @@ ieee80211_tdma_vattach(struct ieee80211vap *vap) settxparms(vap, IEEE80211_MODE_11NG, TDMA_TXRATE_11NG_DEFAULT); settxparms(vap, IEEE80211_MODE_HALF, TDMA_TXRATE_HALF_DEFAULT); settxparms(vap, IEEE80211_MODE_QUARTER, TDMA_TXRATE_QUARTER_DEFAULT); + settxparms(vap, IEEE80211_MODE_VHT_2GHZ, TDMA_TXRATE_11NG_DEFAULT); + settxparms(vap, IEEE80211_MODE_VHT_5GHZ, TDMA_TXRATE_11NA_DEFAULT); setackpolicy(vap->iv_ic, 1); /* disable ACK's */ diff --git a/freebsd/sys/net80211/ieee80211_var.h b/freebsd/sys/net80211/ieee80211_var.h index 1b73d392..1d806a92 100644 --- a/freebsd/sys/net80211/ieee80211_var.h +++ b/freebsd/sys/net80211/ieee80211_var.h @@ -93,7 +93,13 @@ * says that VHT is supported - and then this macro can be * changed. */ -#define IEEE80211_CONF_VHT(ic) ((ic)->ic_vhtcaps != 0) +#define IEEE80211_CONF_VHT(ic) \ + ((ic)->ic_flags_ext & IEEE80211_FEXT_VHT) + +#define IEEE80211_CONF_SEQNO_OFFLOAD(ic) \ + ((ic)->ic_flags_ext & IEEE80211_FEXT_SEQNO_OFFLOAD) +#define IEEE80211_CONF_FRAG_OFFLOAD(ic) \ + ((ic)->ic_flags_ext & IEEE80211_FEXT_FRAG_OFFLOAD) /* * 802.11 control state is split into a common portion that maps @@ -143,7 +149,6 @@ struct ieee80211com { struct task ic_chan_task; /* deferred channel change */ struct task ic_bmiss_task; /* deferred beacon miss hndlr */ struct task ic_chw_task; /* deferred HT CHW update */ - struct task ic_wme_task; /* deferred WME update */ struct task ic_restart_task; /* deferred device restart */ counter_u64_t ic_ierrors; /* input errors */ @@ -169,6 +174,7 @@ struct ieee80211com { uint16_t ic_holdover; /* PM hold over duration */ uint16_t ic_txpowlimit; /* global tx power limit */ struct ieee80211_rateset ic_sup_rates[IEEE80211_MODE_MAX]; + struct ieee80211_htrateset ic_sup_htrates; /* * Channel state: @@ -550,6 +556,10 @@ struct ieee80211vap { int (*iv_output)(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); + int (*iv_wme_update)(struct ieee80211vap *, + const struct wmeParams *wme_params); + struct task iv_wme_task; /* deferred VAP WME update */ + uint64_t iv_spare[6]; }; MALLOC_DECLARE(M_80211_VAP); @@ -566,8 +576,7 @@ MALLOC_DECLARE(M_80211_VAP); #define IEEE80211_F_PRIVACY 0x00000010 /* CONF: privacy enabled */ #define IEEE80211_F_PUREG 0x00000020 /* CONF: 11g w/o 11b sta's */ #define IEEE80211_F_SCAN 0x00000080 /* STATUS: scanning */ -#define IEEE80211_F_ASCAN 0x00000100 /* STATUS: active scan */ -#define IEEE80211_F_SIBSS 0x00000200 /* STATUS: start IBSS */ +/* 0x00000300 reserved */ /* NB: this is intentionally setup to be IEEE80211_CAPINFO_SHORT_SLOTTIME */ #define IEEE80211_F_SHSLOT 0x00000400 /* STATUS: use short slot time*/ #define IEEE80211_F_PMGTON 0x00000800 /* CONF: Power mgmt enable */ @@ -575,8 +584,7 @@ MALLOC_DECLARE(M_80211_VAP); #define IEEE80211_F_WME 0x00002000 /* CONF: enable WME use */ #define IEEE80211_F_BGSCAN 0x00004000 /* CONF: bg scan enabled (???)*/ #define IEEE80211_F_SWRETRY 0x00008000 /* CONF: sw tx retry enabled */ -#define IEEE80211_F_TXPOW_FIXED 0x00010000 /* TX Power: fixed rate */ -#define IEEE80211_F_IBSSON 0x00020000 /* CONF: IBSS creation enable */ +/* 0x00030000 reserved */ #define IEEE80211_F_SHPREAMBLE 0x00040000 /* STATUS: use short preamble */ #define IEEE80211_F_DATAPAD 0x00080000 /* CONF: do alignment pad */ #define IEEE80211_F_USEPROT 0x00100000 /* STATUS: protection enabled */ @@ -594,9 +602,9 @@ MALLOC_DECLARE(M_80211_VAP); #define IEEE80211_F_DWDS 0x80000000 /* CONF: Dynamic WDS enabled */ #define IEEE80211_F_BITS \ - "\20\1TURBOP\2COMP\3FF\4BURST\5PRIVACY\6PUREG\10SCAN\11ASCAN\12SIBSS" \ - "\13SHSLOT\14PMGTON\15DESBSSID\16WME\17BGSCAN\20SWRETRY\21TXPOW_FIXED" \ - "\22IBSSON\23SHPREAMBLE\24DATAPAD\25USEPROT\26USERBARKER\27CSAPENDING" \ + "\20\1TURBOP\2COMP\3FF\4BURST\5PRIVACY\6PUREG\10SCAN" \ + "\13SHSLOT\14PMGTON\15DESBSSID\16WME\17BGSCAN\20SWRETRY" \ + "\23SHPREAMBLE\24DATAPAD\25USEPROT\26USERBARKER\27CSAPENDING" \ "\30WPA1\31WPA2\32DROPUNENC\33COUNTERM\34HIDESSID\35NOBRIDG\36PCF" \ "\37DOTH\40DWDS" @@ -629,14 +637,21 @@ MALLOC_DECLARE(M_80211_VAP); #define IEEE80211_FEXT_PROBECHAN 0x00020000 /* CONF: probe passive channel*/ #define IEEE80211_FEXT_UNIQMAC 0x00040000 /* CONF: user or computed mac */ #define IEEE80211_FEXT_SCAN_OFFLOAD 0x00080000 /* CONF: scan is fully offloaded */ +#define IEEE80211_FEXT_SEQNO_OFFLOAD 0x00100000 /* CONF: driver does seqno insertion/allocation */ +#define IEEE80211_FEXT_FRAG_OFFLOAD 0x00200000 /* CONF: hardware does 802.11 fragmentation + assignment */ +#define IEEE80211_FEXT_VHT 0x00400000 /* CONF: VHT support */ +#define IEEE80211_FEXT_QUIET_IE 0x00800000 /* STATUS: quiet IE in a beacon has been added */ #define IEEE80211_FEXT_BITS \ "\20\2INACT\3SCANWAIT\4BGSCAN\5WPS\6TSN\7SCANREQ\10RESUME" \ "\0114ADDR\12NONEPR_PR\13SWBMISS\14DFS\15DOTD\16STATEWAIT\17REINIT" \ - "\20BPF\21WDSLEGACY\22PROBECHAN\23UNIQMAC\24SCAN_OFFLOAD" + "\20BPF\21WDSLEGACY\22PROBECHAN\23UNIQMAC\24SCAN_OFFLOAD\25SEQNO_OFFLOAD" \ + "\26VHT\27QUIET_IE" /* ic_flags_ht/iv_flags_ht */ #define IEEE80211_FHT_NONHT_PR 0x00000001 /* STATUS: non-HT sta present */ +#define IEEE80211_FHT_LDPC_TX 0x00010000 /* CONF: LDPC tx enabled */ +#define IEEE80211_FHT_LDPC_RX 0x00020000 /* CONF: LDPC rx enabled */ #define IEEE80211_FHT_GF 0x00040000 /* CONF: Greenfield enabled */ #define IEEE80211_FHT_HT 0x00080000 /* CONF: HT supported */ #define IEEE80211_FHT_AMPDU_TX 0x00100000 /* CONF: A-MPDU tx supported */ @@ -681,6 +696,8 @@ int ieee80211_vap_attach(struct ieee80211vap *, void ieee80211_vap_detach(struct ieee80211vap *); const struct ieee80211_rateset *ieee80211_get_suprates(struct ieee80211com *ic, const struct ieee80211_channel *); +const struct ieee80211_htrateset *ieee80211_get_suphtrates( + struct ieee80211com *, const struct ieee80211_channel *); void ieee80211_announce(struct ieee80211com *); void ieee80211_announce_channels(struct ieee80211com *); void ieee80211_drain(struct ieee80211com *); diff --git a/freebsd/sys/net80211/ieee80211_vht.c b/freebsd/sys/net80211/ieee80211_vht.c new file mode 100644 index 00000000..acd939d3 --- /dev/null +++ b/freebsd/sys/net80211/ieee80211_vht.c @@ -0,0 +1,855 @@ +#include + +/*- + * Copyright (c) 2017 Adrian Chadd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#ifdef __FreeBSD__ +__FBSDID("$FreeBSD$"); +#endif + +/* + * IEEE 802.11ac-2013 protocol support. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +/* define here, used throughout file */ +#define MS(_v, _f) (((_v) & _f) >> _f##_S) +#define SM(_v, _f) (((_v) << _f##_S) & _f) + +#define ADDSHORT(frm, v) do { \ + frm[0] = (v) & 0xff; \ + frm[1] = (v) >> 8; \ + frm += 2; \ +} while (0) +#define ADDWORD(frm, v) do { \ + frm[0] = (v) & 0xff; \ + frm[1] = ((v) >> 8) & 0xff; \ + frm[2] = ((v) >> 16) & 0xff; \ + frm[3] = ((v) >> 24) & 0xff; \ + frm += 4; \ +} while (0) + +/* + * Immediate TODO: + * + * + handle WLAN_ACTION_VHT_OPMODE_NOTIF and other VHT action frames + * + ensure vhtinfo/vhtcap parameters correctly use the negotiated + * capabilities and ratesets + * + group ID management operation + */ + +/* + * XXX TODO: handle WLAN_ACTION_VHT_OPMODE_NOTIF + * + * Look at mac80211/vht.c:ieee80211_vht_handle_opmode() for further details. + */ + +static int +vht_recv_action_placeholder(struct ieee80211_node *ni, + const struct ieee80211_frame *wh, + const uint8_t *frm, const uint8_t *efrm) +{ + +#ifdef IEEE80211_DEBUG + ieee80211_note(ni->ni_vap, "%s: called; fc=0x%.2x/0x%.2x", + __func__, + wh->i_fc[0], + wh->i_fc[1]); +#endif + return (0); +} + +static int +vht_send_action_placeholder(struct ieee80211_node *ni, + int category, int action, void *arg0) +{ + +#ifdef IEEE80211_DEBUG + ieee80211_note(ni->ni_vap, "%s: called; category=%d, action=%d", + __func__, + category, + action); +#endif + return (EINVAL); +} + +static void +ieee80211_vht_init(void) +{ + + ieee80211_recv_action_register(IEEE80211_ACTION_CAT_VHT, + WLAN_ACTION_VHT_COMPRESSED_BF, vht_recv_action_placeholder); + ieee80211_recv_action_register(IEEE80211_ACTION_CAT_VHT, + WLAN_ACTION_VHT_GROUPID_MGMT, vht_recv_action_placeholder); + ieee80211_recv_action_register(IEEE80211_ACTION_CAT_VHT, + WLAN_ACTION_VHT_OPMODE_NOTIF, vht_recv_action_placeholder); + + ieee80211_send_action_register(IEEE80211_ACTION_CAT_VHT, + WLAN_ACTION_VHT_COMPRESSED_BF, vht_send_action_placeholder); + ieee80211_send_action_register(IEEE80211_ACTION_CAT_VHT, + WLAN_ACTION_VHT_GROUPID_MGMT, vht_send_action_placeholder); + ieee80211_send_action_register(IEEE80211_ACTION_CAT_VHT, + WLAN_ACTION_VHT_OPMODE_NOTIF, vht_send_action_placeholder); +} + +SYSINIT(wlan_vht, SI_SUB_DRIVERS, SI_ORDER_FIRST, ieee80211_vht_init, NULL); + +void +ieee80211_vht_attach(struct ieee80211com *ic) +{ +} + +void +ieee80211_vht_detach(struct ieee80211com *ic) +{ +} + +void +ieee80211_vht_vattach(struct ieee80211vap *vap) +{ + struct ieee80211com *ic = vap->iv_ic; + + if (! IEEE80211_CONF_VHT(ic)) + return; + + vap->iv_vhtcaps = ic->ic_vhtcaps; + vap->iv_vhtextcaps = ic->ic_vhtextcaps; + + /* XXX assume VHT80 support; should really check vhtcaps */ + vap->iv_flags_vht = + IEEE80211_FVHT_VHT + | IEEE80211_FVHT_USEVHT40 + | IEEE80211_FVHT_USEVHT80; + /* XXX TODO: enable VHT80+80, VHT160 capabilities */ + + memcpy(&vap->iv_vht_mcsinfo, &ic->ic_vht_mcsinfo, + sizeof(struct ieee80211_vht_mcs_info)); +} + +void +ieee80211_vht_vdetach(struct ieee80211vap *vap) +{ +} + +#if 0 +static void +vht_announce(struct ieee80211com *ic, enum ieee80211_phymode mode) +{ +} +#endif + +static int +vht_mcs_to_num(int m) +{ + + switch (m) { + case IEEE80211_VHT_MCS_SUPPORT_0_7: + return (7); + case IEEE80211_VHT_MCS_SUPPORT_0_8: + return (8); + case IEEE80211_VHT_MCS_SUPPORT_0_9: + return (9); + default: + return (0); + } +} + +void +ieee80211_vht_announce(struct ieee80211com *ic) +{ + int i, tx, rx; + + if (! IEEE80211_CONF_VHT(ic)) + return; + + /* Channel width */ + ic_printf(ic, "[VHT] Channel Widths: 20MHz, 40MHz, 80MHz"); + if (MS(ic->ic_vhtcaps, IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK) == 2) + printf(" 80+80MHz"); + if (MS(ic->ic_vhtcaps, IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK) >= 1) + printf(" 160MHz"); + printf("\n"); + + /* Features */ + ic_printf(ic, "[VHT] Features: %b\n", ic->ic_vhtcaps, + IEEE80211_VHTCAP_BITS); + + /* For now, just 5GHz VHT. Worry about 2GHz VHT later */ + for (i = 0; i < 7; i++) { + /* Each stream is 2 bits */ + tx = (ic->ic_vht_mcsinfo.tx_mcs_map >> (2*i)) & 0x3; + rx = (ic->ic_vht_mcsinfo.rx_mcs_map >> (2*i)) & 0x3; + if (tx == 3 && rx == 3) + continue; + ic_printf(ic, "[VHT] NSS %d: TX MCS 0..%d, RX MCS 0..%d\n", + i + 1, + vht_mcs_to_num(tx), + vht_mcs_to_num(rx)); + } +} + +void +ieee80211_vht_node_init(struct ieee80211_node *ni) +{ + + IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, + "%s: called", __func__); + ni->ni_flags |= IEEE80211_NODE_VHT; +} + +void +ieee80211_vht_node_cleanup(struct ieee80211_node *ni) +{ + + IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, + "%s: called", __func__); + ni->ni_flags &= ~IEEE80211_NODE_VHT; + ni->ni_vhtcap = 0; + bzero(&ni->ni_vht_mcsinfo, sizeof(struct ieee80211_vht_mcs_info)); +} + +/* + * Parse an 802.11ac VHT operation IE. + */ +void +ieee80211_parse_vhtopmode(struct ieee80211_node *ni, const uint8_t *ie) +{ + /* vht operation */ + ni->ni_vht_chanwidth = ie[2]; + ni->ni_vht_chan1 = ie[3]; + ni->ni_vht_chan2 = ie[4]; + ni->ni_vht_basicmcs = le16dec(ie + 5); + +#if 0 + printf("%s: chan1=%d, chan2=%d, chanwidth=%d, basicmcs=0x%04x\n", + __func__, + ni->ni_vht_chan1, + ni->ni_vht_chan2, + ni->ni_vht_chanwidth, + ni->ni_vht_basicmcs); +#endif +} + +/* + * Parse an 802.11ac VHT capability IE. + */ +void +ieee80211_parse_vhtcap(struct ieee80211_node *ni, const uint8_t *ie) +{ + + /* vht capability */ + ni->ni_vhtcap = le32dec(ie + 2); + + /* suppmcs */ + ni->ni_vht_mcsinfo.rx_mcs_map = le16dec(ie + 6); + ni->ni_vht_mcsinfo.rx_highest = le16dec(ie + 8); + ni->ni_vht_mcsinfo.tx_mcs_map = le16dec(ie + 10); + ni->ni_vht_mcsinfo.tx_highest = le16dec(ie + 12); +} + +int +ieee80211_vht_updateparams(struct ieee80211_node *ni, + const uint8_t *vhtcap_ie, + const uint8_t *vhtop_ie) +{ + + //printf("%s: called\n", __func__); + + ieee80211_parse_vhtcap(ni, vhtcap_ie); + ieee80211_parse_vhtopmode(ni, vhtop_ie); + return (0); +} + +void +ieee80211_setup_vht_rates(struct ieee80211_node *ni, + const uint8_t *vhtcap_ie, + const uint8_t *vhtop_ie) +{ + + //printf("%s: called\n", __func__); + /* XXX TODO */ +} + +void +ieee80211_vht_timeout(struct ieee80211com *ic) +{ +} + +void +ieee80211_vht_node_join(struct ieee80211_node *ni) +{ + + IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, + "%s: called", __func__); +} + +void +ieee80211_vht_node_leave(struct ieee80211_node *ni) +{ + + IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, + "%s: called", __func__); +} + +/* + * Calculate the VHTCAP IE for a given node. + * + * This includes calculating the capability intersection based on the + * current operating mode and intersection of the TX/RX MCS maps. + * + * The standard only makes it clear about MCS rate negotiation + * and MCS basic rates (which must be a subset of the general + * negotiated rates). It doesn't make it clear that the AP should + * figure out the minimum functional overlap with the STA and + * support that. + * + * Note: this is in host order, not in 802.11 endian order. + * + * TODO: ensure I re-read 9.7.11 Rate Selection for VHT STAs. + * + * TODO: investigate what we should negotiate for MU-MIMO beamforming + * options. + * + * opmode is '1' for "vhtcap as if I'm a STA", 0 otherwise. + */ +void +ieee80211_vht_get_vhtcap_ie(struct ieee80211_node *ni, + struct ieee80211_ie_vhtcap *vhtcap, int opmode) +{ + struct ieee80211vap *vap = ni->ni_vap; +// struct ieee80211com *ic = vap->iv_ic; + uint32_t val, val1, val2; + uint32_t new_vhtcap; + int i; + + vhtcap->ie = IEEE80211_ELEMID_VHT_CAP; + vhtcap->len = sizeof(struct ieee80211_ie_vhtcap) - 2; + + /* + * Capabilities - it depends on whether we are a station + * or not. + */ + new_vhtcap = 0; + + /* + * Station - use our desired configuration based on + * local config, local device bits and the already-learnt + * vhtcap/vhtinfo IE in the node. + */ + + /* Limit MPDU size to the smaller of the two */ + val2 = val1 = MS(vap->iv_vhtcaps, IEEE80211_VHTCAP_MAX_MPDU_MASK); + if (opmode == 1) { + val2 = MS(ni->ni_vhtcap, IEEE80211_VHTCAP_MAX_MPDU_MASK); + } + val = MIN(val1, val2); + new_vhtcap |= SM(val, IEEE80211_VHTCAP_MAX_MPDU_MASK); + + /* Limit supp channel config */ + val2 = val1 = MS(vap->iv_vhtcaps, + IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK); + if (opmode == 1) { + val2 = MS(ni->ni_vhtcap, + IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK); + } + if ((val2 == 2) && + ((vap->iv_flags_vht & IEEE80211_FVHT_USEVHT80P80) == 0)) + val2 = 1; + if ((val2 == 1) && + ((vap->iv_flags_vht & IEEE80211_FVHT_USEVHT160) == 0)) + val2 = 0; + val = MIN(val1, val2); + new_vhtcap |= SM(val, IEEE80211_VHTCAP_SUPP_CHAN_WIDTH_MASK); + + /* RX LDPC */ + val2 = val1 = MS(vap->iv_vhtcaps, IEEE80211_VHTCAP_RXLDPC); + if (opmode == 1) { + val2 = MS(ni->ni_vhtcap, IEEE80211_VHTCAP_RXLDPC); + } + val = MIN(val1, val2); + new_vhtcap |= SM(val, IEEE80211_VHTCAP_RXLDPC); + + /* Short-GI 80 */ + val2 = val1 = MS(vap->iv_vhtcaps, IEEE80211_VHTCAP_SHORT_GI_80); + if (opmode == 1) { + val2 = MS(ni->ni_vhtcap, IEEE80211_VHTCAP_SHORT_GI_80); + } + val = MIN(val1, val2); + new_vhtcap |= SM(val, IEEE80211_VHTCAP_SHORT_GI_80); + + /* Short-GI 160 */ + val2 = val1 = MS(vap->iv_vhtcaps, IEEE80211_VHTCAP_SHORT_GI_160); + if (opmode == 1) { + val2 = MS(ni->ni_vhtcap, IEEE80211_VHTCAP_SHORT_GI_160); + } + val = MIN(val1, val2); + new_vhtcap |= SM(val, IEEE80211_VHTCAP_SHORT_GI_160); + + /* + * STBC is slightly more complicated. + * + * In non-STA mode, we just announce our capabilities and that + * is that. + * + * In STA mode, we should calculate our capabilities based on + * local capabilities /and/ what the remote says. So: + * + * + Only TX STBC if we support it and the remote supports RX STBC; + * + Only announce RX STBC if we support it and the remote supports + * TX STBC; + * + RX STBC should be the minimum of local and remote RX STBC; + */ + + /* TX STBC */ + val2 = val1 = MS(vap->iv_vhtcaps, IEEE80211_VHTCAP_TXSTBC); + if (opmode == 1) { + /* STA mode - enable it only if node RXSTBC is non-zero */ + val2 = !! MS(ni->ni_vhtcap, IEEE80211_VHTCAP_RXSTBC_MASK); + } + val = MIN(val1, val2); + /* XXX For now, use the 11n config flag */ + if ((vap->iv_flags_ht & IEEE80211_FHT_STBC_TX) == 0) + val = 0; + new_vhtcap |= SM(val, IEEE80211_VHTCAP_TXSTBC); + + /* RX STBC1..4 */ + val2 = val1 = MS(vap->iv_vhtcaps, IEEE80211_VHTCAP_RXSTBC_MASK); + if (opmode == 1) { + /* STA mode - enable it only if node TXSTBC is non-zero */ + val2 = MS(ni->ni_vhtcap, IEEE80211_VHTCAP_TXSTBC); + } + val = MIN(val1, val2); + /* XXX For now, use the 11n config flag */ + if ((vap->iv_flags_ht & IEEE80211_FHT_STBC_RX) == 0) + val = 0; + new_vhtcap |= SM(val, IEEE80211_VHTCAP_RXSTBC_MASK); + + /* + * Finally - if RXSTBC is 0, then don't enable TXSTBC. + * Strictly speaking a device can TXSTBC and not RXSTBC, but + * it would be silly. + */ + if (val == 0) + new_vhtcap &= ~IEEE80211_VHTCAP_TXSTBC; + + /* + * Some of these fields require other fields to exist. + * So before using it, the parent field needs to be checked + * otherwise the overridden value may be wrong. + * + * For example, if SU beamformee is set to 0, then BF STS + * needs to be 0. + */ + + /* SU Beamformer capable */ + val2 = val1 = MS(vap->iv_vhtcaps, + IEEE80211_VHTCAP_SU_BEAMFORMER_CAPABLE); + if (opmode == 1) { + val2 = MS(ni->ni_vhtcap, + IEEE80211_VHTCAP_SU_BEAMFORMER_CAPABLE); + } + val = MIN(val1, val2); + new_vhtcap |= SM(val, IEEE80211_VHTCAP_SU_BEAMFORMER_CAPABLE); + + /* SU Beamformee capable */ + val2 = val1 = MS(vap->iv_vhtcaps, + IEEE80211_VHTCAP_SU_BEAMFORMEE_CAPABLE); + if (opmode == 1) { + val2 = MS(ni->ni_vhtcap, + IEEE80211_VHTCAP_SU_BEAMFORMEE_CAPABLE); + } + val = MIN(val1, val2); + new_vhtcap |= SM(val, IEEE80211_VHTCAP_SU_BEAMFORMEE_CAPABLE); + + /* Beamformee STS capability - only if SU beamformee capable */ + val2 = val1 = MS(vap->iv_vhtcaps, IEEE80211_VHTCAP_BEAMFORMEE_STS_MASK); + if (opmode == 1) { + val2 = MS(ni->ni_vhtcap, IEEE80211_VHTCAP_BEAMFORMEE_STS_MASK); + } + val = MIN(val1, val2); + if ((new_vhtcap & IEEE80211_VHTCAP_SU_BEAMFORMEE_CAPABLE) == 0) + val = 0; + new_vhtcap |= SM(val, IEEE80211_VHTCAP_BEAMFORMEE_STS_MASK); + + /* Sounding dimensions - only if SU beamformer capable */ + val2 = val1 = MS(vap->iv_vhtcaps, + IEEE80211_VHTCAP_SOUNDING_DIMENSIONS_MASK); + if (opmode == 1) + val2 = MS(ni->ni_vhtcap, + IEEE80211_VHTCAP_SOUNDING_DIMENSIONS_MASK); + val = MIN(val1, val2); + if ((new_vhtcap & IEEE80211_VHTCAP_SU_BEAMFORMER_CAPABLE) == 0) + val = 0; + new_vhtcap |= SM(val, IEEE80211_VHTCAP_SOUNDING_DIMENSIONS_MASK); + + /* + * MU Beamformer capable - only if SU BFF capable, MU BFF capable + * and STA (not AP) + */ + val2 = val1 = MS(vap->iv_vhtcaps, + IEEE80211_VHTCAP_MU_BEAMFORMER_CAPABLE); + if (opmode == 1) + val2 = MS(ni->ni_vhtcap, + IEEE80211_VHTCAP_MU_BEAMFORMER_CAPABLE); + val = MIN(val1, val2); + if ((new_vhtcap & IEEE80211_VHTCAP_SU_BEAMFORMER_CAPABLE) == 0) + val = 0; + if (opmode != 1) /* Only enable for STA mode */ + val = 0; + new_vhtcap |= SM(val, IEEE80211_VHTCAP_SU_BEAMFORMER_CAPABLE); + + /* + * MU Beamformee capable - only if SU BFE capable, MU BFE capable + * and AP (not STA) + */ + val2 = val1 = MS(vap->iv_vhtcaps, + IEEE80211_VHTCAP_MU_BEAMFORMEE_CAPABLE); + if (opmode == 1) + val2 = MS(ni->ni_vhtcap, + IEEE80211_VHTCAP_MU_BEAMFORMEE_CAPABLE); + val = MIN(val1, val2); + if ((new_vhtcap & IEEE80211_VHTCAP_SU_BEAMFORMEE_CAPABLE) == 0) + val = 0; + if (opmode != 0) /* Only enable for AP mode */ + val = 0; + new_vhtcap |= SM(val, IEEE80211_VHTCAP_SU_BEAMFORMEE_CAPABLE); + + /* VHT TXOP PS */ + val2 = val1 = MS(vap->iv_vhtcaps, IEEE80211_VHTCAP_VHT_TXOP_PS); + if (opmode == 1) + val2 = MS(ni->ni_vhtcap, IEEE80211_VHTCAP_VHT_TXOP_PS); + val = MIN(val1, val2); + new_vhtcap |= SM(val, IEEE80211_VHTCAP_VHT_TXOP_PS); + + /* HTC_VHT */ + val2 = val1 = MS(vap->iv_vhtcaps, IEEE80211_VHTCAP_HTC_VHT); + if (opmode == 1) + val2 = MS(ni->ni_vhtcap, IEEE80211_VHTCAP_HTC_VHT); + val = MIN(val1, val2); + new_vhtcap |= SM(val, IEEE80211_VHTCAP_HTC_VHT); + + /* A-MPDU length max */ + /* XXX TODO: we need a userland config knob for this */ + val2 = val1 = MS(vap->iv_vhtcaps, + IEEE80211_VHTCAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK); + if (opmode == 1) + val2 = MS(ni->ni_vhtcap, + IEEE80211_VHTCAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK); + val = MIN(val1, val2); + new_vhtcap |= SM(val, IEEE80211_VHTCAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK); + + /* + * Link adaptation is only valid if HTC-VHT capable is 1. + * Otherwise, always set it to 0. + */ + val2 = val1 = MS(vap->iv_vhtcaps, + IEEE80211_VHTCAP_VHT_LINK_ADAPTATION_VHT_MASK); + if (opmode == 1) + val2 = MS(ni->ni_vhtcap, + IEEE80211_VHTCAP_VHT_LINK_ADAPTATION_VHT_MASK); + val = MIN(val1, val2); + if ((new_vhtcap & IEEE80211_VHTCAP_HTC_VHT) == 0) + val = 0; + new_vhtcap |= SM(val, IEEE80211_VHTCAP_VHT_LINK_ADAPTATION_VHT_MASK); + + /* + * The following two options are 0 if the pattern may change, 1 if it + * does not change. So, downgrade to the higher value. + */ + + /* RX antenna pattern */ + val2 = val1 = MS(vap->iv_vhtcaps, IEEE80211_VHTCAP_RX_ANTENNA_PATTERN); + if (opmode == 1) + val2 = MS(ni->ni_vhtcap, IEEE80211_VHTCAP_RX_ANTENNA_PATTERN); + val = MAX(val1, val2); + new_vhtcap |= SM(val, IEEE80211_VHTCAP_RX_ANTENNA_PATTERN); + + /* TX antenna pattern */ + val2 = val1 = MS(vap->iv_vhtcaps, IEEE80211_VHTCAP_TX_ANTENNA_PATTERN); + if (opmode == 1) + val2 = MS(ni->ni_vhtcap, IEEE80211_VHTCAP_TX_ANTENNA_PATTERN); + val = MAX(val1, val2); + new_vhtcap |= SM(val, IEEE80211_VHTCAP_TX_ANTENNA_PATTERN); + + /* + * MCS set - again, we announce what we want to use + * based on configuration, device capabilities and + * already-learnt vhtcap/vhtinfo IE information. + */ + + /* MCS set - start with whatever the device supports */ + vhtcap->supp_mcs.rx_mcs_map = vap->iv_vht_mcsinfo.rx_mcs_map; + vhtcap->supp_mcs.rx_highest = 0; + vhtcap->supp_mcs.tx_mcs_map = vap->iv_vht_mcsinfo.tx_mcs_map; + vhtcap->supp_mcs.tx_highest = 0; + + vhtcap->vht_cap_info = new_vhtcap; + + /* + * Now, if we're a STA, mask off whatever the AP doesn't support. + * Ie, we continue to state we can receive whatever we can do, + * but we only announce that we will transmit rates that meet + * the AP requirement. + * + * Note: 0 - MCS0..7; 1 - MCS0..8; 2 - MCS0..9; 3 = not supported. + * We can't just use MIN() because '3' means "no", so special case it. + */ + if (opmode) { + for (i = 0; i < 8; i++) { + val1 = (vhtcap->supp_mcs.tx_mcs_map >> (i*2)) & 0x3; + val2 = (ni->ni_vht_mcsinfo.tx_mcs_map >> (i*2)) & 0x3; + val = MIN(val1, val2); + if (val1 == 3 || val2 == 3) + val = 3; + vhtcap->supp_mcs.tx_mcs_map &= ~(0x3 << (i*2)); + vhtcap->supp_mcs.tx_mcs_map |= (val << (i*2)); + } + } +} + +/* + * Add a VHTCAP field. + * + * If in station mode, we announce what we would like our + * desired configuration to be. + * + * Else, we announce our capabilities based on our current + * configuration. + */ +uint8_t * +ieee80211_add_vhtcap(uint8_t *frm, struct ieee80211_node *ni) +{ + struct ieee80211_ie_vhtcap vhtcap; + int opmode; + + opmode = 0; + if (ni->ni_vap->iv_opmode == IEEE80211_M_STA) + opmode = 1; + + ieee80211_vht_get_vhtcap_ie(ni, &vhtcap, opmode); + + memset(frm, '\0', sizeof(struct ieee80211_ie_vhtcap)); + + frm[0] = IEEE80211_ELEMID_VHT_CAP; + frm[1] = sizeof(struct ieee80211_ie_vhtcap) - 2; + frm += 2; + + /* 32-bit VHT capability */ + ADDWORD(frm, vhtcap.vht_cap_info); + + /* suppmcs */ + ADDSHORT(frm, vhtcap.supp_mcs.rx_mcs_map); + ADDSHORT(frm, vhtcap.supp_mcs.rx_highest); + ADDSHORT(frm, vhtcap.supp_mcs.tx_mcs_map); + ADDSHORT(frm, vhtcap.supp_mcs.tx_highest); + + return (frm); +} + +static uint8_t +ieee80211_vht_get_chwidth_ie(struct ieee80211_channel *c) +{ + + /* + * XXX TODO: look at the node configuration as + * well? + */ + + if (IEEE80211_IS_CHAN_VHT160(c)) { + return IEEE80211_VHT_CHANWIDTH_160MHZ; + } + if (IEEE80211_IS_CHAN_VHT80_80(c)) { + return IEEE80211_VHT_CHANWIDTH_80P80MHZ; + } + if (IEEE80211_IS_CHAN_VHT80(c)) { + return IEEE80211_VHT_CHANWIDTH_80MHZ; + } + if (IEEE80211_IS_CHAN_VHT40(c)) { + return IEEE80211_VHT_CHANWIDTH_USE_HT; + } + if (IEEE80211_IS_CHAN_VHT20(c)) { + return IEEE80211_VHT_CHANWIDTH_USE_HT; + } + + /* We shouldn't get here */ + printf("%s: called on a non-VHT channel (freq=%d, flags=0x%08x\n", + __func__, + (int) c->ic_freq, + c->ic_flags); + return IEEE80211_VHT_CHANWIDTH_USE_HT; +} + +/* + * Note: this just uses the current channel information; + * it doesn't use the node info after parsing. + * + * XXX TODO: need to make the basic MCS set configurable. + * XXX TODO: read 802.11-2013 to determine what to set + * chwidth to when scanning. I have a feeling + * it isn't involved in scanning and we shouldn't + * be sending it; and I don't yet know what to set + * it to for IBSS or hostap where the peer may be + * a completely different channel width to us. + */ +uint8_t * +ieee80211_add_vhtinfo(uint8_t *frm, struct ieee80211_node *ni) +{ + memset(frm, '\0', sizeof(struct ieee80211_ie_vht_operation)); + + frm[0] = IEEE80211_ELEMID_VHT_OPMODE; + frm[1] = sizeof(struct ieee80211_ie_vht_operation) - 2; + frm += 2; + + /* 8-bit chanwidth */ + *frm++ = ieee80211_vht_get_chwidth_ie(ni->ni_chan); + + /* 8-bit freq1 */ + *frm++ = ni->ni_chan->ic_vht_ch_freq1; + + /* 8-bit freq2 */ + *frm++ = ni->ni_chan->ic_vht_ch_freq2; + + /* 16-bit basic MCS set - just MCS0..7 for NSS=1 for now */ + ADDSHORT(frm, 0xfffc); + + return (frm); +} + +void +ieee80211_vht_update_cap(struct ieee80211_node *ni, const uint8_t *vhtcap_ie, + const uint8_t *vhtop_ie) +{ + + ieee80211_parse_vhtcap(ni, vhtcap_ie); + ieee80211_parse_vhtopmode(ni, vhtop_ie); +} + +static struct ieee80211_channel * +findvhtchan(struct ieee80211com *ic, struct ieee80211_channel *c, int vhtflags) +{ + + return (ieee80211_find_channel(ic, c->ic_freq, + (c->ic_flags & ~IEEE80211_CHAN_VHT) | vhtflags)); +} + +/* + * Handle channel promotion to VHT, similar to ieee80211_ht_adjust_channel(). + */ +struct ieee80211_channel * +ieee80211_vht_adjust_channel(struct ieee80211com *ic, + struct ieee80211_channel *chan, int flags) +{ + struct ieee80211_channel *c; + + /* First case - handle channel demotion - if VHT isn't set */ + if ((flags & IEEE80211_FVHT_VHT) == 0) { +#if 0 + printf("%s: demoting channel %d/0x%08x\n", __func__, + chan->ic_ieee, chan->ic_flags); +#endif + c = ieee80211_find_channel(ic, chan->ic_freq, + chan->ic_flags & ~IEEE80211_CHAN_VHT); + if (c == NULL) + c = chan; +#if 0 + printf("%s: .. to %d/0x%08x\n", __func__, + c->ic_ieee, c->ic_flags); +#endif + return (c); + } + + /* + * We can upgrade to VHT - attempt to do so + * + * Note: we don't clear the HT flags, these are the hints + * for HT40U/HT40D when selecting VHT40 or larger channels. + */ + /* Start with VHT80 */ + c = NULL; + if ((c == NULL) && (flags & IEEE80211_FVHT_USEVHT160)) + c = findvhtchan(ic, chan, IEEE80211_CHAN_VHT80); + + if ((c == NULL) && (flags & IEEE80211_FVHT_USEVHT80P80)) + c = findvhtchan(ic, chan, IEEE80211_CHAN_VHT80_80); + + if ((c == NULL) && (flags & IEEE80211_FVHT_USEVHT80)) + c = findvhtchan(ic, chan, IEEE80211_CHAN_VHT80); + + if ((c == NULL) && (flags & IEEE80211_FVHT_USEVHT40)) + c = findvhtchan(ic, chan, IEEE80211_CHAN_VHT40U); + if ((c == NULL) && (flags & IEEE80211_FVHT_USEVHT40)) + c = findvhtchan(ic, chan, IEEE80211_CHAN_VHT40D); + /* + * If we get here, VHT20 is always possible because we checked + * for IEEE80211_FVHT_VHT above. + */ + if (c == NULL) + c = findvhtchan(ic, chan, IEEE80211_CHAN_VHT20); + + if (c != NULL) + chan = c; + +#if 0 + printf("%s: selected %d/0x%08x\n", __func__, c->ic_ieee, c->ic_flags); +#endif + return (chan); +} + +/* + * Calculate the VHT operation IE for a given node. + * + * This includes calculating the suitable channel width/parameters + * and basic MCS set. + * + * TODO: ensure I read 9.7.11 Rate Selection for VHT STAs. + * TODO: ensure I read 10.39.7 - BSS Basic VHT-MCS and NSS set operation. + */ +void +ieee80211_vht_get_vhtinfo_ie(struct ieee80211_node *ni, + struct ieee80211_ie_vht_operation *vhtop, int opmode) +{ + printf("%s: called; TODO!\n", __func__); +} diff --git a/freebsd/sys/net80211/ieee80211_vht.h b/freebsd/sys/net80211/ieee80211_vht.h new file mode 100644 index 00000000..791762b1 --- /dev/null +++ b/freebsd/sys/net80211/ieee80211_vht.h @@ -0,0 +1,68 @@ +/*- + * Copyright (c) 2016 Adrian Chadd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef _NET80211_IEEE80211_VHT_H_ +#define _NET80211_IEEE80211_VHT_H_ + +void ieee80211_vht_attach(struct ieee80211com *); +void ieee80211_vht_detach(struct ieee80211com *); +void ieee80211_vht_vattach(struct ieee80211vap *); +void ieee80211_vht_vdetach(struct ieee80211vap *); + +void ieee80211_vht_announce(struct ieee80211com *); + +void ieee80211_vht_node_init(struct ieee80211_node *); +void ieee80211_vht_node_cleanup(struct ieee80211_node *); + +void ieee80211_parse_vhtopmode(struct ieee80211_node *, const uint8_t *); +void ieee80211_parse_vhtcap(struct ieee80211_node *, const uint8_t *); + +int ieee80211_vht_updateparams(struct ieee80211_node *, + const uint8_t *, const uint8_t *); +void ieee80211_setup_vht_rates(struct ieee80211_node *, + const uint8_t *, const uint8_t *); + +void ieee80211_vht_timeout(struct ieee80211com *ic); + +void ieee80211_vht_node_join(struct ieee80211_node *ni); +void ieee80211_vht_node_leave(struct ieee80211_node *ni); + +uint8_t * ieee80211_add_vhtcap(uint8_t *frm, struct ieee80211_node *); +uint8_t * ieee80211_add_vhtinfo(uint8_t *frm, struct ieee80211_node *); + +void ieee80211_vht_update_cap(struct ieee80211_node *, + const uint8_t *, const uint8_t *); + +struct ieee80211_channel * + ieee80211_vht_adjust_channel(struct ieee80211com *, + struct ieee80211_channel *, int); + +void ieee80211_vht_get_vhtcap_ie(struct ieee80211_node *ni, + struct ieee80211_ie_vhtcap *, int); +void ieee80211_vht_get_vhtinfo_ie(struct ieee80211_node *ni, + struct ieee80211_ie_vht_operation *, int); + +#endif /* _NET80211_IEEE80211_VHT_H_ */ diff --git a/freebsd/sys/netinet/cc/cc.h b/freebsd/sys/netinet/cc/cc.h index 5e61b04b..9b6279de 100644 --- a/freebsd/sys/netinet/cc/cc.h +++ b/freebsd/sys/netinet/cc/cc.h @@ -52,7 +52,7 @@ #define _NETINET_CC_CC_H_ #if !defined(_KERNEL) -#error "no user-servicable parts inside" +#error "no user-serviceable parts inside" #endif /* Global CC vars. */ diff --git a/freebsd/sys/netinet/icmp6.h b/freebsd/sys/netinet/icmp6.h index af35c847..d2e35e42 100644 --- a/freebsd/sys/netinet/icmp6.h +++ b/freebsd/sys/netinet/icmp6.h @@ -42,7 +42,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/icmp_var.h b/freebsd/sys/netinet/icmp_var.h index d76679f6..565c7d48 100644 --- a/freebsd/sys/netinet/icmp_var.h +++ b/freebsd/sys/netinet/icmp_var.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/if_ether.c b/freebsd/sys/netinet/if_ether.c index 9fb25c21..d2b0595a 100644 --- a/freebsd/sys/netinet/if_ether.c +++ b/freebsd/sys/netinet/if_ether.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -466,9 +466,12 @@ arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m, if (la == NULL && (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) { la = lltable_alloc_entry(LLTABLE(ifp), 0, dst); if (la == NULL) { + char addrbuf[INET_ADDRSTRLEN]; + log(LOG_DEBUG, "arpresolve: can't allocate llinfo for %s on %s\n", - inet_ntoa(SIN(dst)->sin_addr), if_name(ifp)); + inet_ntoa_r(SIN(dst)->sin_addr, addrbuf), + if_name(ifp)); m_freem(m); return (EINVAL); } @@ -805,6 +808,7 @@ in_arpinput(struct mbuf *m) size_t linkhdrsize; int lladdr_off; int error; + char addrbuf[INET_ADDRSTRLEN]; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; @@ -929,7 +933,7 @@ match: goto drop; /* it's from me, ignore it. */ if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) { ARP_LOG(LOG_NOTICE, "link address is broadcast for IP address " - "%s!\n", inet_ntoa(isaddr)); + "%s!\n", inet_ntoa_r(isaddr, addrbuf)); goto drop; } @@ -951,7 +955,7 @@ match: myaddr.s_addr != 0) { ARP_LOG(LOG_ERR, "%*D is using my IP address %s on %s!\n", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", - inet_ntoa(isaddr), ifp->if_xname); + inet_ntoa_r(isaddr, addrbuf), ifp->if_xname); itaddr = myaddr; ARPSTAT_INC(dupips); goto reply; @@ -1088,12 +1092,14 @@ reply: if (nh4.nh_ifp != ifp) { ARP_LOG(LOG_INFO, "proxy: ignoring request" " from %s via %s\n", - inet_ntoa(isaddr), ifp->if_xname); + inet_ntoa_r(isaddr, addrbuf), + ifp->if_xname); goto drop; } #ifdef DEBUG_PROXY - printf("arp: proxying for %s\n", inet_ntoa(itaddr)); + printf("arp: proxying for %s\n", + inet_ntoa_r(itaddr, addrbuf)); #endif } } @@ -1103,7 +1109,7 @@ reply: /* RFC 3927 link-local IPv4; always reply by broadcast. */ #ifdef DEBUG_LINKLOCAL printf("arp: sending reply for link-local addr %s\n", - inet_ntoa(itaddr)); + inet_ntoa_r(itaddr, addrbuf)); #endif m->m_flags |= M_BCAST; m->m_flags &= ~M_MCAST; @@ -1164,6 +1170,7 @@ arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, struct ifnet *ifp uint8_t linkhdr[LLE_MAX_LINKHDR]; size_t linkhdrsize; int lladdr_off; + char addrbuf[INET_ADDRSTRLEN]; LLE_WLOCK_ASSERT(la); @@ -1172,7 +1179,7 @@ arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, struct ifnet *ifp if (log_arp_wrong_iface) ARP_LOG(LOG_WARNING, "%s is on %s " "but got reply from %*D on %s\n", - inet_ntoa(isaddr), + inet_ntoa_r(isaddr, addrbuf), la->lle_tbl->llt_ifp->if_xname, ifp->if_addrlen, (u_char *)ar_sha(ah), ":", ifp->if_xname); @@ -1189,15 +1196,16 @@ arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, struct ifnet *ifp "permanent entry for %s on %s\n", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", - inet_ntoa(isaddr), ifp->if_xname); + inet_ntoa_r(isaddr, addrbuf), + ifp->if_xname); return; } if (log_arp_movements) { ARP_LOG(LOG_INFO, "%s moved from %*D " "to %*D on %s\n", - inet_ntoa(isaddr), + inet_ntoa_r(isaddr, addrbuf), ifp->if_addrlen, - (u_char *)&la->ll_addr, ":", + (u_char *)la->ll_addr, ":", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", ifp->if_xname); } diff --git a/freebsd/sys/netinet/if_ether.h b/freebsd/sys/netinet/if_ether.h index 27e51f78..b0bc30cf 100644 --- a/freebsd/sys/netinet/if_ether.h +++ b/freebsd/sys/netinet/if_ether.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/igmp.c b/freebsd/sys/netinet/igmp.c index 12bb2f07..9443bb64 100644 --- a/freebsd/sys/netinet/igmp.c +++ b/freebsd/sys/netinet/igmp.c @@ -17,7 +17,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -314,17 +314,6 @@ igmp_scrub_context(struct mbuf *m) m->m_pkthdr.flowid = 0; } -#ifdef KTR -static __inline char * -inet_ntoa_haddr(in_addr_t haddr) -{ - struct in_addr ia; - - ia.s_addr = htonl(haddr); - return (inet_ntoa(ia)); -} -#endif - /* * Restore context from a queued IGMP output chain. * Return saved ifindex. @@ -874,8 +863,9 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, */ inm = inm_lookup(ifp, igmp->igmp_group); if (inm != NULL) { - CTR3(KTR_IGMPV3, "process v2 query %s on ifp %p(%s)", - inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); + CTR3(KTR_IGMPV3, + "process v2 query 0x%08x on ifp %p(%s)", + ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname); igmp_v2_update_group(inm, timer); } } @@ -906,8 +896,8 @@ static void igmp_v2_update_group(struct in_multi *inm, const int timer) { - CTR4(KTR_IGMPV3, "%s: %s/%s timer=%d", __func__, - inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, timer); + CTR4(KTR_IGMPV3, "0x%08x: %s/%s timer=%d", __func__, + ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname, timer); IN_MULTI_LOCK_ASSERT(); @@ -1087,8 +1077,8 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, goto out_locked; } } - CTR3(KTR_IGMPV3, "process v3 %s query on ifp %p(%s)", - inet_ntoa(igmpv3->igmp_group), ifp, ifp->if_xname); + CTR3(KTR_IGMPV3, "process v3 0x%08x query on ifp %p(%s)", + ntohl(igmpv3->igmp_group.s_addr), ifp, ifp->if_xname); /* * If there is a pending General Query response * scheduled sooner than the selected delay, no @@ -1248,8 +1238,8 @@ igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip, } } - CTR3(KTR_IGMPV3, "process v1 report %s on ifp %p(%s)", - inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); + CTR3(KTR_IGMPV3, "process v1 report 0x%08x on ifp %p(%s)", + ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname); /* * IGMPv1 report suppression. @@ -1291,15 +1281,17 @@ igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip, case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: CTR3(KTR_IGMPV3, - "report suppressed for %s on ifp %p(%s)", - inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); + "report suppressed for 0x%08x on ifp %p(%s)", + ntohl(igmp->igmp_group.s_addr), ifp, + ifp->if_xname); case IGMP_SLEEPING_MEMBER: inm->inm_state = IGMP_SLEEPING_MEMBER; break; case IGMP_REPORTING_MEMBER: CTR3(KTR_IGMPV3, - "report suppressed for %s on ifp %p(%s)", - inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); + "report suppressed for 0x%08x on ifp %p(%s)", + ntohl(igmp->igmp_group.s_addr), ifp, + ifp->if_xname); if (igi->igi_version == IGMP_VERSION_1) inm->inm_state = IGMP_LAZY_MEMBER; else if (igi->igi_version == IGMP_VERSION_2) @@ -1372,8 +1364,8 @@ igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip, if (ia != NULL) ifa_free(&ia->ia_ifa); - CTR3(KTR_IGMPV3, "process v2 report %s on ifp %p(%s)", - inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); + CTR3(KTR_IGMPV3, "process v2 report 0x%08x on ifp %p(%s)", + ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname); /* * IGMPv2 report suppression. @@ -1413,8 +1405,8 @@ igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip, case IGMP_IDLE_MEMBER: case IGMP_AWAKENING_MEMBER: CTR3(KTR_IGMPV3, - "report suppressed for %s on ifp %p(%s)", - inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); + "report suppressed for 0x%08x on ifp %p(%s)", + ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname); case IGMP_LAZY_MEMBER: inm->inm_state = IGMP_LAZY_MEMBER; break; @@ -1901,8 +1893,9 @@ igmp_v3_process_group_timers(struct igmp_ifsoftc *igi, (void)igmp_v3_merge_state_changes(inm, scq); inm_commit(inm); - CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, - inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); + CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__, + ntohl(inm->inm_addr.s_addr), + inm->inm_ifp->if_xname); /* * If we are leaving the group for good, make sure @@ -2348,10 +2341,9 @@ igmp_initial_join(struct in_multi *inm, struct igmp_ifsoftc *igi) struct ifnet *ifp; struct mbufq *mq; int error, retval, syncstates; - - CTR4(KTR_IGMPV3, "%s: initial join %s on ifp %p(%s)", - __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, - inm->inm_ifp->if_xname); + + CTR4(KTR_IGMPV3, "%s: initial join 0x%08x on ifp %p(%s)", __func__, + ntohl(inm->inm_addr.s_addr), inm->inm_ifp, inm->inm_ifp->if_xname); error = 0; syncstates = 1; @@ -2460,8 +2452,8 @@ igmp_initial_join(struct in_multi *inm, struct igmp_ifsoftc *igi) */ if (syncstates) { inm_commit(inm); - CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, - inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); + CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__, + ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname); } return (error); @@ -2476,9 +2468,8 @@ igmp_handle_state_change(struct in_multi *inm, struct igmp_ifsoftc *igi) struct ifnet *ifp; int retval; - CTR4(KTR_IGMPV3, "%s: state change for %s on ifp %p(%s)", - __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, - inm->inm_ifp->if_xname); + CTR4(KTR_IGMPV3, "%s: state change for 0x%08x on ifp %p(%s)", __func__, + ntohl(inm->inm_addr.s_addr), inm->inm_ifp, inm->inm_ifp->if_xname); ifp = inm->inm_ifp; @@ -2497,8 +2488,8 @@ igmp_handle_state_change(struct in_multi *inm, struct igmp_ifsoftc *igi) } CTR1(KTR_IGMPV3, "%s: nothing to do", __func__); inm_commit(inm); - CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, - inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); + CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__, + ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname); return (0); } @@ -2536,8 +2527,8 @@ igmp_final_leave(struct in_multi *inm, struct igmp_ifsoftc *igi) syncstates = 1; - CTR4(KTR_IGMPV3, "%s: final leave %s on ifp %p(%s)", - __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, + CTR4(KTR_IGMPV3, "%s: final leave 0x%08x on ifp %p(%s)", + __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp, inm->inm_ifp->if_xname); IN_MULTI_LOCK_ASSERT(); @@ -2578,9 +2569,9 @@ igmp_final_leave(struct in_multi *inm, struct igmp_ifsoftc *igi) } else { inm->inm_scrv = igi->igi_rv; } - CTR4(KTR_IGMPV3, "%s: Leaving %s/%s with %d " + CTR4(KTR_IGMPV3, "%s: Leaving 0x%08x/%s with %d " "pending retransmissions.", __func__, - inet_ntoa(inm->inm_addr), + ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname, inm->inm_scrv); if (inm->inm_scrv == 0) { inm->inm_state = IGMP_NOT_MEMBER; @@ -2613,11 +2604,12 @@ igmp_final_leave(struct in_multi *inm, struct igmp_ifsoftc *igi) if (syncstates) { inm_commit(inm); - CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, - inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); + CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__, + ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname); inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; - CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for %s/%s", - __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); + CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for 0x%08x/%s", + __func__, ntohl(inm->inm_addr.s_addr), + inm->inm_ifp->if_xname); } } @@ -2742,9 +2734,8 @@ igmp_v3_enqueue_group_record(struct mbufq *mq, struct in_multi *inm, return (igmp_v3_enqueue_filter_change(mq, inm)); if (type == IGMP_DO_NOTHING) { - CTR3(KTR_IGMPV3, "%s: nothing to do for %s/%s", - __func__, inet_ntoa(inm->inm_addr), - inm->inm_ifp->if_xname); + CTR3(KTR_IGMPV3, "%s: nothing to do for 0x%08x/%s", __func__, + ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname); return (0); } @@ -2757,8 +2748,8 @@ igmp_v3_enqueue_group_record(struct mbufq *mq, struct in_multi *inm, if (record_has_sources) minrec0len += sizeof(in_addr_t); - CTR4(KTR_IGMPV3, "%s: queueing %s for %s/%s", __func__, - igmp_rec_type_to_str(type), inet_ntoa(inm->inm_addr), + CTR4(KTR_IGMPV3, "%s: queueing %s for 0x%08x/%s", __func__, + igmp_rec_type_to_str(type), ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname); /* @@ -2846,8 +2837,8 @@ igmp_v3_enqueue_group_record(struct mbufq *mq, struct in_multi *inm, } msrcs = 0; RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) { - CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, - inet_ntoa_haddr(ims->ims_haddr)); + CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, + ims->ims_haddr); now = ims_get_mode(inm, ims, 1); CTR2(KTR_IGMPV3, "%s: node is %d", __func__, now); if ((now != mode) || @@ -2942,8 +2933,8 @@ igmp_v3_enqueue_group_record(struct mbufq *mq, struct in_multi *inm, msrcs = 0; RB_FOREACH_FROM(ims, ip_msource_tree, nims) { - CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, - inet_ntoa_haddr(ims->ims_haddr)); + CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, + ims->ims_haddr); now = ims_get_mode(inm, ims, 1); if ((now != mode) || (now == mode && mode == MCAST_UNDEFINED)) { @@ -3134,8 +3125,8 @@ igmp_v3_enqueue_filter_change(struct mbufq *mq, struct in_multi *inm) if (nims == NULL) nims = RB_MIN(ip_msource_tree, &inm->inm_srcs); RB_FOREACH_FROM(ims, ip_msource_tree, nims) { - CTR2(KTR_IGMPV3, "%s: visit node %s", - __func__, inet_ntoa_haddr(ims->ims_haddr)); + CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", + __func__, ims->ims_haddr); now = ims_get_mode(inm, ims, 1); then = ims_get_mode(inm, ims, 0); CTR3(KTR_IGMPV3, "%s: mode: t0 %d, t1 %d", diff --git a/freebsd/sys/netinet/igmp.h b/freebsd/sys/netinet/igmp.h index 8f574290..9d19726a 100644 --- a/freebsd/sys/netinet/igmp.h +++ b/freebsd/sys/netinet/igmp.h @@ -14,7 +14,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/igmp_var.h b/freebsd/sys/netinet/igmp_var.h index 5242d07d..c2401506 100644 --- a/freebsd/sys/netinet/igmp_var.h +++ b/freebsd/sys/netinet/igmp_var.h @@ -14,7 +14,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/in.c b/freebsd/sys/netinet/in.c index f08e550b..a02e3034 100644 --- a/freebsd/sys/netinet/in.c +++ b/freebsd/sys/netinet/in.c @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -73,7 +73,7 @@ __FBSDID("$FreeBSD$"); #include static int in_aifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *); -static int in_difaddr_ioctl(caddr_t, struct ifnet *, struct thread *); +static int in_difaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *); static void in_socktrim(struct sockaddr_in *); static void in_purgemaddrs(struct ifnet *); @@ -247,7 +247,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, break; case SIOCDIFADDR: sx_xlock(&in_control_sx); - error = in_difaddr_ioctl(data, ifp, td); + error = in_difaddr_ioctl(cmd, data, ifp, td); sx_xunlock(&in_control_sx); return (error); #ifndef __rtems__ @@ -394,7 +394,7 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) IF_ADDR_RUNLOCK(ifp); if (ia != NULL) - (void )in_difaddr_ioctl(data, ifp, td); + (void )in_difaddr_ioctl(cmd, data, ifp, td); ifa = ifa_alloc(sizeof(struct in_ifaddr), M_WAITOK); ia = (struct in_ifaddr *)ifa; @@ -532,7 +532,7 @@ fail2: fail1: if (ia->ia_ifa.ifa_carp) - (*carp_detach_p)(&ia->ia_ifa); + (*carp_detach_p)(&ia->ia_ifa, false); IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); @@ -549,7 +549,7 @@ fail1: } static int -in_difaddr_ioctl(caddr_t data, struct ifnet *ifp, struct thread *td) +in_difaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { const struct ifreq *ifr = (struct ifreq *)data; const struct sockaddr_in *addr = (const struct sockaddr_in *) @@ -622,7 +622,8 @@ in_difaddr_ioctl(caddr_t data, struct ifnet *ifp, struct thread *td) in_ifadown(&ia->ia_ifa, 1); if (ia->ia_ifa.ifa_carp) - (*carp_detach_p)(&ia->ia_ifa); + (*carp_detach_p)(&ia->ia_ifa, + (cmd == SIOCDIFADDR) ? false : true); /* * If this is the last IPv4 address configured on this @@ -1221,7 +1222,7 @@ in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr */ if (!(rt_flags & RTF_HOST) && info.rti_ifp != ifp) { const char *sa, *mask, *addr, *lim; - int len; + const struct sockaddr_in *l3sin; mask = (const char *)&rt_mask; /* @@ -1233,14 +1234,17 @@ in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr sa = (const char *)&rt_key; addr = (const char *)l3addr; - len = ((const struct sockaddr_in *)l3addr)->sin_len; - lim = addr + len; + l3sin = (const struct sockaddr_in *)l3addr; + lim = addr + l3sin->sin_len; for ( ; addr < lim; sa++, mask++, addr++) { if ((*sa ^ *addr) & *mask) { #ifdef DIAGNOSTIC - log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n", - inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr)); + char addrbuf[INET_ADDRSTRLEN]; + + log(LOG_INFO, "IPv4 address: \"%s\" " + "is not on the network\n", + inet_ntoa_r(l3sin->sin_addr, addrbuf)); #endif return (EINVAL); } diff --git a/freebsd/sys/netinet/in.h b/freebsd/sys/netinet/in.h index b06e3334..6b831344 100644 --- a/freebsd/sys/netinet/in.h +++ b/freebsd/sys/netinet/in.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -437,6 +437,8 @@ __END_DECLS #define IP_BINDANY 24 /* bool: allow bind to any address */ #define IP_BINDMULTI 25 /* bool: allow multiple listeners on a tuple */ #define IP_RSS_LISTEN_BUCKET 26 /* int; set RSS listen bucket */ +#define IP_ORIGDSTADDR 27 /* bool: receive IP dst addr/port w/dgram */ +#define IP_RECVORIGDSTADDR IP_ORIGDSTADDR /* * Options for controlling the firewall and dummynet. @@ -650,7 +652,6 @@ int in_localaddr(struct in_addr); int in_localip(struct in_addr); int in_ifhasaddr(struct ifnet *, struct in_addr); int inet_aton(const char *, struct in_addr *); /* in libkern */ -char *inet_ntoa(struct in_addr); /* in libkern */ char *inet_ntoa_r(struct in_addr ina, char *buf); /* in libkern */ char *inet_ntop(int, const void *, char *, socklen_t); /* in libkern */ int inet_pton(int af, const char *, void *); /* in libkern */ diff --git a/freebsd/sys/netinet/in_fib.c b/freebsd/sys/netinet/in_fib.c index f1edf976..159b8f28 100644 --- a/freebsd/sys/netinet/in_fib.c +++ b/freebsd/sys/netinet/in_fib.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/in_fib.h b/freebsd/sys/netinet/in_fib.h index 754a2e3c..fa72fd76 100644 --- a/freebsd/sys/netinet/in_fib.h +++ b/freebsd/sys/netinet/in_fib.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/in_mcast.c b/freebsd/sys/netinet/in_mcast.c index 3d68718e..635b2579 100644 --- a/freebsd/sys/netinet/in_mcast.c +++ b/freebsd/sys/netinet/in_mcast.c @@ -497,9 +497,12 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *group, ("%s: ifma not AF_INET", __func__)); KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || - !in_hosteq(inm->inm_addr, *group)) + !in_hosteq(inm->inm_addr, *group)) { + char addrbuf[INET_ADDRSTRLEN]; + panic("%s: ifma %p is inconsistent with %p (%s)", - __func__, ifma, inm, inet_ntoa(*group)); + __func__, ifma, inm, inet_ntoa_r(*group, addrbuf)); + } #endif ++inm->inm_refcount; *pinm = inm; @@ -876,9 +879,6 @@ inm_get_source(struct in_multi *inm, const in_addr_t haddr, { struct ip_msource find; struct ip_msource *ims, *nims; -#ifdef KTR - struct in_addr ia; -#endif find.ims_haddr = haddr; ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); @@ -894,9 +894,8 @@ inm_get_source(struct in_multi *inm, const in_addr_t haddr, ++inm->inm_nsrc; ims = nims; #ifdef KTR - ia.s_addr = htonl(haddr); - CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__, - inet_ntoa(ia), ims); + CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__, + haddr, ims); #endif } @@ -913,29 +912,24 @@ ims_merge(struct ip_msource *ims, const struct in_msource *lims, const int rollback) { int n = rollback ? -1 : 1; -#ifdef KTR - struct in_addr ia; - - ia.s_addr = htonl(ims->ims_haddr); -#endif if (lims->imsl_st[0] == MCAST_EXCLUDE) { - CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s", - __func__, n, inet_ntoa(ia)); + CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x", + __func__, n, ims->ims_haddr); ims->ims_st[1].ex -= n; } else if (lims->imsl_st[0] == MCAST_INCLUDE) { - CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s", - __func__, n, inet_ntoa(ia)); + CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x", + __func__, n, ims->ims_haddr); ims->ims_st[1].in -= n; } if (lims->imsl_st[1] == MCAST_EXCLUDE) { - CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s", - __func__, n, inet_ntoa(ia)); + CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x", + __func__, n, ims->ims_haddr); ims->ims_st[1].ex += n; } else if (lims->imsl_st[1] == MCAST_INCLUDE) { - CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s", - __func__, n, inet_ntoa(ia)); + CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x", + __func__, n, ims->ims_haddr); ims->ims_st[1].in += n; } } @@ -1171,8 +1165,8 @@ in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, IN_MULTI_LOCK_ASSERT(); - CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__, - inet_ntoa(*gina), ifp, ifp->if_xname); + CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__, + ntohl(gina->s_addr), ifp, ifp->if_xname); error = 0; inm = NULL; @@ -1255,8 +1249,8 @@ in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) IN_MULTI_LOCK_ASSERT(); - CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__, - inm, inet_ntoa(inm->inm_addr), + CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__, + inm, ntohl(inm->inm_addr.s_addr), (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), imf); @@ -1304,9 +1298,13 @@ in_addmulti(struct in_addr *ap, struct ifnet *ifp) { struct in_multi *pinm; int error; +#ifdef INVARIANTS + char addrbuf[INET_ADDRSTRLEN]; +#endif KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), - ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap))); + ("%s: %s not in 224.0.0.0/24", __func__, + inet_ntoa_r(*ap, addrbuf))); error = in_joingroup(ifp, ap, NULL, &pinm); if (error != 0) @@ -1385,8 +1383,8 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) if (sopt->sopt_name == IP_BLOCK_SOURCE) doblock = 1; - CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", - __func__, inet_ntoa(mreqs.imr_interface), ifp); + CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", + __func__, ntohl(mreqs.imr_interface.s_addr), ifp); break; } @@ -1458,8 +1456,8 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) */ ims = imo_match_source(imo, idx, &ssa->sa); if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { - CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__, - inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not "); + CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, + ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); error = EADDRNOTAVAIL; goto out_inp_locked; } @@ -1987,8 +1985,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt) ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, mreqs.imr_interface); - CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", - __func__, inet_ntoa(mreqs.imr_interface), ifp); + CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", + __func__, ntohl(mreqs.imr_interface.s_addr), ifp); break; } @@ -2288,8 +2286,8 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt) if (!in_nullhost(mreqs.imr_interface)) INADDR_TO_IFP(mreqs.imr_interface, ifp); - CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", - __func__, inet_ntoa(mreqs.imr_interface), ifp); + CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", + __func__, ntohl(mreqs.imr_interface.s_addr), ifp); break; @@ -2369,8 +2367,8 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt) } ims = imo_match_source(imo, idx, &ssa->sa); if (ims == NULL) { - CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__, - inet_ntoa(ssa->sin.sin_addr), "not "); + CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", + __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); error = EADDRNOTAVAIL; goto out_inp_locked; } @@ -2489,8 +2487,8 @@ inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) if (ifp == NULL) return (EADDRNOTAVAIL); } - CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp, - inet_ntoa(addr)); + CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp, + ntohl(addr.s_addr)); } /* Reject interfaces which do not support multicast. */ @@ -2867,8 +2865,8 @@ sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) group.s_addr = name[1]; if (!IN_MULTICAST(ntohl(group.s_addr))) { - CTR2(KTR_IGMPV3, "%s: group %s is not multicast", - __func__, inet_ntoa(group)); + CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast", + __func__, ntohl(group.s_addr)); return (EINVAL); } @@ -2899,12 +2897,8 @@ sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) if (retval != 0) break; RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { -#ifdef KTR - struct in_addr ina; - ina.s_addr = htonl(ims->ims_haddr); - CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, - inet_ntoa(ina)); -#endif + CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, + ims->ims_haddr); /* * Only copy-out sources which are in-mode. */ @@ -2967,13 +2961,14 @@ void inm_print(const struct in_multi *inm) { int t; + char addrbuf[INET_ADDRSTRLEN]; if ((ktr_mask & KTR_IGMPV3) == 0) return; printf("%s: --- begin inm %p ---\n", __func__, inm); printf("addr %s ifp %p(%s) ifma %p\n", - inet_ntoa(inm->inm_addr), + inet_ntoa_r(inm->inm_addr, addrbuf), inm->inm_ifp, inm->inm_ifp->if_xname, inm->inm_ifma); diff --git a/freebsd/sys/netinet/in_pcb.c b/freebsd/sys/netinet/in_pcb.c index 809a7de0..e423eed8 100644 --- a/freebsd/sys/netinet/in_pcb.c +++ b/freebsd/sys/netinet/in_pcb.c @@ -18,7 +18,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -63,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -102,11 +104,7 @@ __FBSDID("$FreeBSD$"); #include #endif /* INET6 */ - -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ +#include #include @@ -309,8 +307,8 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) goto out; mac_inpcb_create(so, inp); #endif -#ifdef IPSEC - error = ipsec_init_policy(so, &inp->inp_sp); +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + error = ipsec_init_pcbpolicy(inp); if (error != 0) { #ifdef MAC mac_inpcb_destroy(inp); @@ -336,8 +334,14 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) #endif inp->inp_gencnt = ++pcbinfo->ipi_gencnt; refcount_init(&inp->inp_refcount, 1); /* Reference from inpcbinfo */ + + /* + * Routes in inpcb's can cache L2 as well; they are guaranteed + * to be cleaned up. + */ + inp->inp_route.ro_flags = RT_LLE_CACHE; INP_LIST_WUNLOCK(pcbinfo); -#if defined(IPSEC) || defined(MAC) +#if defined(IPSEC) || defined(IPSEC_SUPPORT) || defined(MAC) out: if (error != 0) { crfree(inp->inp_cred); @@ -1152,6 +1156,10 @@ in_pcbdetach(struct inpcb *inp) KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__)); +#ifdef RATELIMIT + if (inp->inp_snd_tag != NULL) + in_pcbdetach_txrtlmt(inp); +#endif inp->inp_socket->so_pcb = NULL; inp->inp_socket = NULL; } @@ -1290,7 +1298,7 @@ in_pcbfree(struct inpcb *inp) INP_WLOCK_ASSERT(inp); /* XXXRW: Do as much as possible here. */ -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) if (inp->inp_sp != NULL) ipsec_delete_pcbpolicy(inp); #endif @@ -2358,7 +2366,7 @@ inp_runlock(struct inpcb *inp) INP_RUNLOCK(inp); } -#ifdef INVARIANTS +#ifdef INVARIANT_SUPPORT void inp_lock_assert(struct inpcb *inp) { @@ -2444,6 +2452,41 @@ so_sototcpcb(struct socket *so) return (sototcpcb(so)); } +/* + * Create an external-format (``xinpcb'') structure using the information in + * the kernel-format in_pcb structure pointed to by inp. This is done to + * reduce the spew of irrelevant information over this interface, to isolate + * user code from changes in the kernel structure, and potentially to provide + * information-hiding if we decide that some of this information should be + * hidden from users. + */ +void +in_pcbtoxinpcb(const struct inpcb *inp, struct xinpcb *xi) +{ + + xi->xi_len = sizeof(struct xinpcb); + if (inp->inp_socket) + sotoxsocket(inp->inp_socket, &xi->xi_socket); + else + bzero(&xi->xi_socket, sizeof(struct xsocket)); + bcopy(&inp->inp_inc, &xi->inp_inc, sizeof(struct in_conninfo)); + xi->inp_gencnt = inp->inp_gencnt; + xi->inp_ppcb = inp->inp_ppcb; + xi->inp_flow = inp->inp_flow; + xi->inp_flowid = inp->inp_flowid; + xi->inp_flowtype = inp->inp_flowtype; + xi->inp_flags = inp->inp_flags; + xi->inp_flags2 = inp->inp_flags2; + xi->inp_rss_listen_bucket = inp->inp_rss_listen_bucket; + xi->in6p_cksum = inp->in6p_cksum; + xi->in6p_hops = inp->in6p_hops; + xi->inp_ip_tos = inp->inp_ip_tos; + xi->inp_vflag = inp->inp_vflag; + xi->inp_ip_ttl = inp->inp_ip_ttl; + xi->inp_ip_p = inp->inp_ip_p; + xi->inp_ip_minttl = inp->inp_ip_minttl; +} + #ifdef DDB static void db_print_indent(int indent) @@ -2502,6 +2545,10 @@ db_print_inpflags(int inp_flags) db_printf("%sINP_RECVDSTADDR", comma ? ", " : ""); comma = 1; } + if (inp_flags & INP_ORIGDSTADDR) { + db_printf("%sINP_ORIGDSTADDR", comma ? ", " : ""); + comma = 1; + } if (inp_flags & INP_HDRINCL) { db_printf("%sINP_HDRINCL", comma ? ", " : ""); comma = 1; @@ -2689,3 +2736,253 @@ DB_SHOW_COMMAND(inpcb, db_show_inpcb) db_print_inpcb(inp, "inpcb", 0); } #endif /* DDB */ + +#ifdef RATELIMIT +/* + * Modify TX rate limit based on the existing "inp->inp_snd_tag", + * if any. + */ +int +in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate) +{ + union if_snd_tag_modify_params params = { + .rate_limit.max_rate = max_pacing_rate, + }; + struct m_snd_tag *mst; + struct ifnet *ifp; + int error; + + mst = inp->inp_snd_tag; + if (mst == NULL) + return (EINVAL); + + ifp = mst->ifp; + if (ifp == NULL) + return (EINVAL); + + if (ifp->if_snd_tag_modify == NULL) { + error = EOPNOTSUPP; + } else { + error = ifp->if_snd_tag_modify(mst, ¶ms); + } + return (error); +} + +/* + * Query existing TX rate limit based on the existing + * "inp->inp_snd_tag", if any. + */ +int +in_pcbquery_txrtlmt(struct inpcb *inp, uint32_t *p_max_pacing_rate) +{ + union if_snd_tag_query_params params = { }; + struct m_snd_tag *mst; + struct ifnet *ifp; + int error; + + mst = inp->inp_snd_tag; + if (mst == NULL) + return (EINVAL); + + ifp = mst->ifp; + if (ifp == NULL) + return (EINVAL); + + if (ifp->if_snd_tag_query == NULL) { + error = EOPNOTSUPP; + } else { + error = ifp->if_snd_tag_query(mst, ¶ms); + if (error == 0 && p_max_pacing_rate != NULL) + *p_max_pacing_rate = params.rate_limit.max_rate; + } + return (error); +} + +/* + * Allocate a new TX rate limit send tag from the network interface + * given by the "ifp" argument and save it in "inp->inp_snd_tag": + */ +int +in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp, + uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate) +{ + union if_snd_tag_alloc_params params = { + .rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT, + .rate_limit.hdr.flowid = flowid, + .rate_limit.hdr.flowtype = flowtype, + .rate_limit.max_rate = max_pacing_rate, + }; + int error; + + INP_WLOCK_ASSERT(inp); + + if (inp->inp_snd_tag != NULL) + return (EINVAL); + + if (ifp->if_snd_tag_alloc == NULL) { + error = EOPNOTSUPP; + } else { + error = ifp->if_snd_tag_alloc(ifp, ¶ms, &inp->inp_snd_tag); + + /* + * At success increment the refcount on + * the send tag's network interface: + */ + if (error == 0) + if_ref(inp->inp_snd_tag->ifp); + } + return (error); +} + +/* + * Free an existing TX rate limit tag based on the "inp->inp_snd_tag", + * if any: + */ +void +in_pcbdetach_txrtlmt(struct inpcb *inp) +{ + struct m_snd_tag *mst; + struct ifnet *ifp; + + INP_WLOCK_ASSERT(inp); + + mst = inp->inp_snd_tag; + inp->inp_snd_tag = NULL; + + if (mst == NULL) + return; + + ifp = mst->ifp; + if (ifp == NULL) + return; + + /* + * If the device was detached while we still had reference(s) + * on the ifp, we assume if_snd_tag_free() was replaced with + * stubs. + */ + ifp->if_snd_tag_free(mst); + + /* release reference count on network interface */ + if_rele(ifp); +} + +/* + * This function should be called when the INP_RATE_LIMIT_CHANGED flag + * is set in the fast path and will attach/detach/modify the TX rate + * limit send tag based on the socket's so_max_pacing_rate value. + */ +void +in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb) +{ + struct socket *socket; + uint32_t max_pacing_rate; + bool did_upgrade; + int error; + + if (inp == NULL) + return; + + socket = inp->inp_socket; + if (socket == NULL) + return; + + if (!INP_WLOCKED(inp)) { + /* + * NOTE: If the write locking fails, we need to bail + * out and use the non-ratelimited ring for the + * transmit until there is a new chance to get the + * write lock. + */ + if (!INP_TRY_UPGRADE(inp)) + return; + did_upgrade = 1; + } else { + did_upgrade = 0; + } + + /* + * NOTE: The so_max_pacing_rate value is read unlocked, + * because atomic updates are not required since the variable + * is checked at every mbuf we send. It is assumed that the + * variable read itself will be atomic. + */ + max_pacing_rate = socket->so_max_pacing_rate; + + /* + * NOTE: When attaching to a network interface a reference is + * made to ensure the network interface doesn't go away until + * all ratelimit connections are gone. The network interface + * pointers compared below represent valid network interfaces, + * except when comparing towards NULL. + */ + if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) { + error = 0; + } else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) { + if (inp->inp_snd_tag != NULL) + in_pcbdetach_txrtlmt(inp); + error = 0; + } else if (inp->inp_snd_tag == NULL) { + /* + * In order to utilize packet pacing with RSS, we need + * to wait until there is a valid RSS hash before we + * can proceed: + */ + if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) { + error = EAGAIN; + } else { + error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb), + mb->m_pkthdr.flowid, max_pacing_rate); + } + } else { + error = in_pcbmodify_txrtlmt(inp, max_pacing_rate); + } + if (error == 0 || error == EOPNOTSUPP) + inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; + if (did_upgrade) + INP_DOWNGRADE(inp); +} + +/* + * Track route changes for TX rate limiting. + */ +void +in_pcboutput_eagain(struct inpcb *inp) +{ + struct socket *socket; + bool did_upgrade; + + if (inp == NULL) + return; + + socket = inp->inp_socket; + if (socket == NULL) + return; + + if (inp->inp_snd_tag == NULL) + return; + + if (!INP_WLOCKED(inp)) { + /* + * NOTE: If the write locking fails, we need to bail + * out and use the non-ratelimited ring for the + * transmit until there is a new chance to get the + * write lock. + */ + if (!INP_TRY_UPGRADE(inp)) + return; + did_upgrade = 1; + } else { + did_upgrade = 0; + } + + /* detach rate limiting */ + in_pcbdetach_txrtlmt(inp); + + /* make sure new mbuf send tag allocation is made */ + inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; + + if (did_upgrade) + INP_DOWNGRADE(inp); +} +#endif /* RATELIMIT */ diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h index ea47d6b2..a3bd23d9 100644 --- a/freebsd/sys/netinet/in_pcb.h +++ b/freebsd/sys/netinet/in_pcb.h @@ -15,7 +15,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -53,7 +53,6 @@ #define in6pcb inpcb /* for KAME src sync over BSD*'s */ #define in6p_sp inp_sp /* for KAME src sync over BSD*'s */ -struct inpcbpolicy; /* * struct inpcb is the common protocol control block structure used in most @@ -65,7 +64,7 @@ struct inpcbpolicy; */ LIST_HEAD(inpcbhead, inpcb); LIST_HEAD(inpcbporthead, inpcbport); -typedef u_quad_t inp_gen_t; +typedef uint64_t inp_gen_t; /* * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet. @@ -130,9 +129,8 @@ struct in_conninfo { #define inc6_laddr inc_ie.ie6_laddr #define inc6_zoneid inc_ie.ie6_zoneid -struct icmp6_filter; - -/*- +#if defined(_KERNEL) || defined(_WANT_INPCB) +/* * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and * IPv6 sockets. In the case of TCP and UDP, further per-connection state is * hung off of inp_ppcb most of the time. Almost all fields of struct inpcb @@ -181,6 +179,9 @@ struct icmp6_filter; * read-lock usage during modification, this model can be applied to other * protocols (especially SCTP). */ +struct icmp6_filter; +struct inpcbpolicy; +struct m_snd_tag; struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */ LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ @@ -202,11 +203,9 @@ struct inpcb { u_char inp_ip_minttl; /* (i) minimum TTL or drop */ uint32_t inp_flowid; /* (x) flow id / queue id */ u_int inp_refcount; /* (i) refcount */ - void *inp_pspare[5]; /* (x) packet pacing / general use */ + struct m_snd_tag *inp_snd_tag; /* (i) send tag for outgoing mbufs */ uint32_t inp_flowtype; /* (x) M_HASHTYPE value */ uint32_t inp_rss_listen_bucket; /* (x) overridden RSS listen bucket */ - u_int inp_ispare[4]; /* (x) packet pacing / user cookie / - * general use */ /* Local and foreign ports, local and foreign addr. */ struct in_conninfo inp_inc; /* (i) list for PCB's local port */ @@ -217,23 +216,23 @@ struct inpcb { /* Protocol-dependent part; options. */ struct { - u_char inp4_ip_tos; /* (i) type of service proto */ - struct mbuf *inp4_options; /* (i) IP options */ - struct ip_moptions *inp4_moptions; /* (i) IP mcast options */ - } inp_depend4; + u_char inp_ip_tos; /* (i) type of service proto */ + struct mbuf *inp_options; /* (i) IP options */ + struct ip_moptions *inp_moptions; /* (i) mcast options */ + }; struct { /* (i) IP options */ - struct mbuf *inp6_options; + struct mbuf *in6p_options; /* (i) IP6 options for outgoing packets */ - struct ip6_pktopts *inp6_outputopts; + struct ip6_pktopts *in6p_outputopts; /* (i) IP multicast options */ - struct ip6_moptions *inp6_moptions; + struct ip6_moptions *in6p_moptions; /* (i) ICMPv6 code type filter */ - struct icmp6_filter *inp6_icmp6filt; + struct icmp6_filter *in6p_icmp6filt; /* (i) IPV6_CHECKSUM setsockopt */ - int inp6_cksum; - short inp6_hops; - } inp_depend6; + int in6p_cksum; + short in6p_hops; + }; LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */ struct inpcbport *inp_phd; /* (i/h) head of this list */ #define inp_zero_size offsetof(struct inpcb, inp_gencnt) @@ -248,24 +247,17 @@ struct inpcb { #define inp_route inp_rtu.inpu_route #define inp_route6 inp_rtu.inpu_route6 }; +#endif /* _KERNEL */ + #define inp_fport inp_inc.inc_fport #define inp_lport inp_inc.inc_lport #define inp_faddr inp_inc.inc_faddr #define inp_laddr inp_inc.inc_laddr -#define inp_ip_tos inp_depend4.inp4_ip_tos -#define inp_options inp_depend4.inp4_options -#define inp_moptions inp_depend4.inp4_moptions #define in6p_faddr inp_inc.inc6_faddr #define in6p_laddr inp_inc.inc6_laddr #define in6p_zoneid inp_inc.inc6_zoneid -#define in6p_hops inp_depend6.inp6_hops /* default hop limit */ #define in6p_flowinfo inp_flow -#define in6p_options inp_depend6.inp6_options -#define in6p_outputopts inp_depend6.inp6_outputopts -#define in6p_moptions inp_depend6.inp6_moptions -#define in6p_icmp6filt inp_depend6.inp6_icmp6filt -#define in6p_cksum inp_depend6.inp6_cksum #define inp_vnet inp_pcbinfo->ipi_vnet @@ -279,21 +271,53 @@ struct inpcb { /* * Interface exported to userland by various protocols which use inpcbs. Hack * alert -- only define if struct xsocket is in scope. + * Fields prefixed with "xi_" are unique to this structure, and the rest + * match fields in the struct inpcb, to ease coding and porting. + * + * Legend: + * (s) - used by userland utilities in src + * (p) - used by utilities in ports + * (3) - is known to be used by third party software not in ports + * (n) - no known usage */ #ifdef _SYS_SOCKETVAR_H_ -struct xinpcb { - size_t xi_len; /* length of this structure */ - struct inpcb xi_inp; - struct xsocket xi_socket; - u_quad_t xi_alignment_hack; -}; - -struct xinpgen { - size_t xig_len; /* length of this structure */ - u_int xig_count; /* number of PCBs at this time */ - inp_gen_t xig_gen; /* generation count at this time */ - so_gen_t xig_sogen; /* socket generation count at this time */ -}; +struct xinpcb { + size_t xi_len; /* length of this structure */ + struct xsocket xi_socket; /* (s,p) */ + struct in_conninfo inp_inc; /* (s,p) */ + uint64_t inp_gencnt; /* (s,p) */ + union { + void *inp_ppcb; /* (s) netstat(1) */ + int64_t ph_ppcb; + }; + int64_t inp_spare64[4]; + uint32_t inp_flow; /* (s) */ + uint32_t inp_flowid; /* (s) */ + uint32_t inp_flowtype; /* (s) */ + int32_t inp_flags; /* (s,p) */ + int32_t inp_flags2; /* (s) */ + int32_t inp_rss_listen_bucket; /* (n) */ + int32_t in6p_cksum; /* (n) */ + int32_t inp_spare32[4]; + uint16_t in6p_hops; /* (n) */ + uint8_t inp_ip_tos; /* (n) */ + int8_t pad8; + uint8_t inp_vflag; /* (s,p) */ + uint8_t inp_ip_ttl; /* (n) */ + uint8_t inp_ip_p; /* (n) */ + uint8_t inp_ip_minttl; /* (n) */ + int8_t inp_spare8[4]; +} __aligned(8); + +struct xinpgen { + size_t xig_len; /* length of this structure */ + u_int xig_count; /* number of PCBs at this time */ + inp_gen_t xig_gen; /* generation count at this time */ + so_gen_t xig_sogen; /* socket generation count this time */ +} __aligned(8); +#ifdef _KERNEL +void in_pcbtoxinpcb(const struct inpcb *, struct xinpcb *); +#endif #endif /* _SYS_SOCKETVAR_H_ */ struct inpcbport { @@ -463,20 +487,12 @@ void inp_wunlock(struct inpcb *); void inp_rlock(struct inpcb *); void inp_runlock(struct inpcb *); -#ifdef INVARIANTS +#ifdef INVARIANT_SUPPORT void inp_lock_assert(struct inpcb *); void inp_unlock_assert(struct inpcb *); #else -static __inline void -inp_lock_assert(struct inpcb *inp __unused) -{ -} - -static __inline void -inp_unlock_assert(struct inpcb *inp __unused) -{ -} - +#define inp_lock_assert(inp) do {} while (0) +#define inp_unlock_assert(inp) do {} while (0) #endif void inp_apply_all(void (*func)(struct inpcb *, void *), void *arg); @@ -616,6 +632,8 @@ short inp_so_options(const struct inpcb *inp); #define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */ #define INP_RECVFLOWID 0x00000100 /* populate recv datagram with flow info */ #define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */ +#define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */ +#define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */ /* * Flags passed to in_pcblookup*() functions. @@ -736,6 +754,14 @@ int in_getsockaddr(struct socket *so, struct sockaddr **nam); struct sockaddr * in_sockaddr(in_port_t port, struct in_addr *addr); void in_pcbsosetlabel(struct socket *so); +#ifdef RATELIMIT +int in_pcbattach_txrtlmt(struct inpcb *, struct ifnet *, uint32_t, uint32_t, uint32_t); +void in_pcbdetach_txrtlmt(struct inpcb *); +int in_pcbmodify_txrtlmt(struct inpcb *, uint32_t); +int in_pcbquery_txrtlmt(struct inpcb *, uint32_t *); +void in_pcboutput_txrtlmt(struct inpcb *, struct ifnet *, struct mbuf *); +void in_pcboutput_eagain(struct inpcb *); +#endif #endif /* _KERNEL */ #endif /* !_NETINET_IN_PCB_H_ */ diff --git a/freebsd/sys/netinet/in_proto.c b/freebsd/sys/netinet/in_proto.c index 8c3efa4d..62472d7e 100644 --- a/freebsd/sys/netinet/in_proto.c +++ b/freebsd/sys/netinet/in_proto.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -92,10 +92,6 @@ __FBSDID("$FreeBSD$"); static struct pr_usrreqs nousrreqs; -#ifdef IPSEC -#include -#endif /* IPSEC */ - #ifdef SCTP #include #include @@ -154,7 +150,7 @@ struct protosw inetsw[] = { .pr_type = SOCK_SEQPACKET, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_SCTP, - .pr_flags = PR_WANTRCVD, + .pr_flags = PR_WANTRCVD|PR_LASTHDR, .pr_input = sctp_input, .pr_ctlinput = sctp_ctlinput, .pr_ctloutput = sctp_ctloutput, @@ -166,7 +162,7 @@ struct protosw inetsw[] = { .pr_type = SOCK_STREAM, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_SCTP, - .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, + .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LASTHDR, .pr_input = sctp_input, .pr_ctlinput = sctp_ctlinput, .pr_ctloutput = sctp_ctloutput, @@ -224,34 +220,6 @@ struct protosw inetsw[] = { .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }, -#ifdef IPSEC -{ - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_AH, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ah4_input, - .pr_ctlinput = ah4_ctlinput, - .pr_usrreqs = &nousrreqs -}, -{ - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_ESP, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = esp4_input, - .pr_ctlinput = esp4_ctlinput, - .pr_usrreqs = &nousrreqs -}, -{ - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_IPCOMP, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ipcomp4_input, - .pr_usrreqs = &nousrreqs -}, -#endif /* IPSEC */ { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, @@ -368,7 +336,7 @@ SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, CTLFLAG_RW, 0, "TCP"); SYSCTL_NODE(_net_inet, IPPROTO_SCTP, sctp, CTLFLAG_RW, 0, "SCTP"); #endif SYSCTL_NODE(_net_inet, IPPROTO_IGMP, igmp, CTLFLAG_RW, 0, "IGMP"); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* XXX no protocol # to use, pick something "reserved" */ SYSCTL_NODE(_net_inet, 253, ipsec, CTLFLAG_RW, 0, "IPSEC"); SYSCTL_NODE(_net_inet, IPPROTO_AH, ah, CTLFLAG_RW, 0, "AH"); diff --git a/freebsd/sys/netinet/in_systm.h b/freebsd/sys/netinet/in_systm.h index a4a56833..573ee40d 100644 --- a/freebsd/sys/netinet/in_systm.h +++ b/freebsd/sys/netinet/in_systm.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/in_var.h b/freebsd/sys/netinet/in_var.h index 08055c4f..b2a7d460 100644 --- a/freebsd/sys/netinet/in_var.h +++ b/freebsd/sys/netinet/in_var.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/ip.h b/freebsd/sys/netinet/ip.h index 98bd1e99..4d9d4888 100644 --- a/freebsd/sys/netinet/ip.h +++ b/freebsd/sys/netinet/ip.h @@ -11,7 +11,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/ip6.h b/freebsd/sys/netinet/ip6.h index ff870579..40c4973c 100644 --- a/freebsd/sys/netinet/ip6.h +++ b/freebsd/sys/netinet/ip6.h @@ -42,7 +42,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/ip_carp.c b/freebsd/sys/netinet/ip_carp.c index 5f67b6f5..da29ecc9 100644 --- a/freebsd/sys/netinet/ip_carp.c +++ b/freebsd/sys/netinet/ip_carp.c @@ -1971,7 +1971,7 @@ carp_attach(struct ifaddr *ifa, int vhid) } void -carp_detach(struct ifaddr *ifa) +carp_detach(struct ifaddr *ifa, bool keep_cif) { struct ifnet *ifp = ifa->ifa_ifp; struct carp_if *cif = ifp->if_carp; @@ -2017,12 +2017,13 @@ carp_detach(struct ifaddr *ifa) carp_hmac_prepare(sc); carp_sc_state(sc); - if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) + if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) carp_destroy(sc); else CARP_UNLOCK(sc); - CIF_FREE(cif); + if (!keep_cif) + CIF_FREE(cif); sx_xunlock(&carp_sx); } diff --git a/freebsd/sys/netinet/ip_carp.h b/freebsd/sys/netinet/ip_carp.h index 5b7e5064..9c6edf6d 100644 --- a/freebsd/sys/netinet/ip_carp.h +++ b/freebsd/sys/netinet/ip_carp.h @@ -138,7 +138,7 @@ struct carpreq { #ifdef _KERNEL int carp_ioctl(struct ifreq *, u_long, struct thread *); int carp_attach(struct ifaddr *, int); -void carp_detach(struct ifaddr *); +void carp_detach(struct ifaddr *, bool); void carp_carpdev_state(struct ifnet *); int carp_input(struct mbuf **, int *, int); int carp6_input (struct mbuf **, int *, int); @@ -154,7 +154,7 @@ int carp_forus(struct ifnet *, u_char *); /* net/if.c */ extern int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); extern int (*carp_attach_p)(struct ifaddr *, int); -extern void (*carp_detach_p)(struct ifaddr *); +extern void (*carp_detach_p)(struct ifaddr *, bool); extern void (*carp_linkstate_p)(struct ifnet *); extern void (*carp_demote_adj_p)(int, char *); extern int (*carp_master_p)(struct ifaddr *); diff --git a/freebsd/sys/netinet/ip_divert.c b/freebsd/sys/netinet/ip_divert.c index b43ebb7c..9fb17fb2 100644 --- a/freebsd/sys/netinet/ip_divert.c +++ b/freebsd/sys/netinet/ip_divert.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -693,12 +693,8 @@ div_pcblist(SYSCTL_HANDLER_ARGS) INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; - bzero(&xi, sizeof(xi)); - xi.xi_len = sizeof xi; - /* XXX should avoid extra copy */ - bcopy(inp, &xi.xi_inp, sizeof *inp); - if (inp->inp_socket) - sotoxsocket(inp->inp_socket, &xi.xi_socket); + + in_pcbtoxinpcb(inp, &xi); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else diff --git a/freebsd/sys/netinet/ip_fw.h b/freebsd/sys/netinet/ip_fw.h index d274ab27..ddee5bf1 100644 --- a/freebsd/sys/netinet/ip_fw.h +++ b/freebsd/sys/netinet/ip_fw.h @@ -281,6 +281,7 @@ enum ipfw_opcodes { /* arguments (4 byte each) */ O_EXTERNAL_ACTION, /* arg1=id of external action handler */ O_EXTERNAL_INSTANCE, /* arg1=id of eaction handler instance */ + O_EXTERNAL_DATA, /* variable length data */ O_LAST_OPCODE /* not an opcode! */ }; diff --git a/freebsd/sys/netinet/ip_icmp.c b/freebsd/sys/netinet/ip_icmp.c index b1816458..77a1c179 100644 --- a/freebsd/sys/netinet/ip_icmp.c +++ b/freebsd/sys/netinet/ip_icmp.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -382,10 +382,12 @@ icmp_input(struct mbuf **mp, int *offp, int proto) */ #ifdef ICMPPRINTFS if (icmpprintfs) { - char buf[4 * sizeof "123"]; - strcpy(buf, inet_ntoa(ip->ip_src)); + char srcbuf[INET_ADDRSTRLEN]; + char dstbuf[INET_ADDRSTRLEN]; + printf("icmp_input from %s to %s, len %d\n", - buf, inet_ntoa(ip->ip_dst), icmplen); + inet_ntoa_r(ip->ip_src, srcbuf), + inet_ntoa_r(ip->ip_dst, dstbuf), icmplen); } #endif if (icmplen < ICMP_MINLEN) { @@ -651,11 +653,12 @@ reflect: icmpdst.sin_addr = icp->icmp_gwaddr; #ifdef ICMPPRINTFS if (icmpprintfs) { - char buf[4 * sizeof "123"]; - strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst)); + char dstbuf[INET_ADDRSTRLEN]; + char gwbuf[INET_ADDRSTRLEN]; printf("redirect dst %s to %s\n", - buf, inet_ntoa(icp->icmp_gwaddr)); + inet_ntoa_r(icp->icmp_ip.ip_dst, dstbuf), + inet_ntoa_r(icp->icmp_gwaddr, gwbuf)); } #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; @@ -903,10 +906,12 @@ icmp_send(struct mbuf *m, struct mbuf *opts) m->m_pkthdr.rcvif = (struct ifnet *)0; #ifdef ICMPPRINTFS if (icmpprintfs) { - char buf[4 * sizeof "123"]; - strcpy(buf, inet_ntoa(ip->ip_dst)); + char dstbuf[INET_ADDRSTRLEN]; + char srcbuf[INET_ADDRSTRLEN]; + printf("icmp_send dst %s src %s\n", - buf, inet_ntoa(ip->ip_src)); + inet_ntoa_r(ip->ip_dst, dstbuf), + inet_ntoa_r(ip->ip_src, srcbuf)); } #endif (void) ip_output(m, opts, NULL, 0, NULL, NULL); diff --git a/freebsd/sys/netinet/ip_icmp.h b/freebsd/sys/netinet/ip_icmp.h index 64db0064..9ffec3b0 100644 --- a/freebsd/sys/netinet/ip_icmp.h +++ b/freebsd/sys/netinet/ip_icmp.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/ip_input.c b/freebsd/sys/netinet/ip_input.c index 9061d41b..4e500e7c 100644 --- a/freebsd/sys/netinet/ip_input.c +++ b/freebsd/sys/netinet/ip_input.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -79,13 +79,10 @@ __FBSDID("$FreeBSD$"); #include #include #include -#ifdef IPSEC -#include -#include -#include -#endif /* IPSEC */ #include +#include + #include #include @@ -432,6 +429,12 @@ ip_direct_input(struct mbuf *m) ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv4)) { + if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) + return; + } +#endif /* IPSEC */ IPSTAT_INC(ips_delivered); (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); return; @@ -561,11 +564,11 @@ tooshort: * ip pointer. */ if (V_ipforwarding != 0 -#ifdef IPSEC - && !key_havesp(IPSEC_DIR_INBOUND) - && !key_havesp(IPSEC_DIR_OUTBOUND) +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + && (!IPSEC_ENABLED(ipv4) || + IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0) #endif - ) { + ) { if ((m = ip_tryforward(m)) == NULL) return; if (m->m_flags & M_FASTFWD_OURS) { @@ -574,13 +577,16 @@ tooshort: goto ours; } } -#ifdef IPSEC + +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Bypass packet filtering for packets previously handled by IPsec. */ - if (ip_ipsec_filtertunnel(m)) - goto passin; + if (IPSEC_ENABLED(ipv4) && + IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0) + goto passin; #endif + /* * Run through list of hooks for input packets. * @@ -804,14 +810,11 @@ ours: hlen = ip->ip_hl << 2; } -#ifdef IPSEC - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if (ip_ipsec_input(m, ip->ip_p) != 0) - goto bad; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv4)) { + if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) + return; + } #endif /* IPSEC */ /* @@ -953,24 +956,14 @@ ip_forward(struct mbuf *m, int srcrt) m_freem(m); return; } -#ifdef IPSEC - if (ip_ipsec_fwd(m) != 0) { - IPSTAT_INC(ips_cantforward); - m_freem(m); - return; - } -#endif /* IPSEC */ + if ( #ifdef IPSTEALTH - if (!V_ipstealth) { + V_ipstealth == 0 && #endif - if (ip->ip_ttl <= IPTTLDEC) { - icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, - 0, 0); - return; - } -#ifdef IPSTEALTH + ip->ip_ttl <= IPTTLDEC) { + icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); + return; } -#endif bzero(&ro, sizeof(ro)); sin = (struct sockaddr_in *)&ro.ro_dst; @@ -989,19 +982,6 @@ ip_forward(struct mbuf *m, int srcrt) ifa_ref(&ia->ia_ifa); } else ia = NULL; -#ifndef IPSEC - /* - * 'ia' may be NULL if there is no route for this destination. - * In case of IPsec, Don't discard it just yet, but pass it to - * ip_output in case of outgoing IPsec policy. - */ - if (!srcrt && ia == NULL) { - icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); - RO_RTFREE(&ro); - return; - } -#endif - /* * Save the IP header and at most 8 bytes of the payload, * in case we need to generate an ICMP message to the src. @@ -1034,15 +1014,22 @@ ip_forward(struct mbuf *m, int srcrt) mcopy->m_pkthdr.len = mcopy->m_len; m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); } - #ifdef IPSTEALTH - if (!V_ipstealth) { + if (V_ipstealth == 0) #endif ip->ip_ttl -= IPTTLDEC; -#ifdef IPSTEALTH +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv4)) { + if ((error = IPSEC_FORWARD(ipv4, m)) != 0) { + /* mbuf consumed by IPsec */ + m_freem(mcopy); + if (error != EINPROGRESS) + IPSTAT_INC(ips_cantforward); + return; + } + /* No IPsec processing required */ } -#endif - +#endif /* IPSEC */ /* * If forwarding packet using same interface that it came in on, * perhaps should send a redirect to sender to shortcut a hop. @@ -1120,14 +1107,6 @@ ip_forward(struct mbuf *m, int srcrt) case EMSGSIZE: type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; - -#ifdef IPSEC - /* - * If IPsec is configured for this path, - * override any possibly mtu value set by ip_output. - */ - mtu = ip_ipsec_mtu(mcopy, mtu); -#endif /* IPSEC */ /* * If the MTU was set before make sure we are below the * interface MTU. @@ -1159,30 +1138,48 @@ ip_forward(struct mbuf *m, int srcrt) icmp_error(mcopy, type, code, dest.s_addr, mtu); } +#define CHECK_SO_CT(sp, ct) \ + (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0) + void ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, struct mbuf *m) { - if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) { + if ((inp->inp_socket->so_options & SO_BINTIME) || + CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) { struct bintime bt; bintime(&bt); - if (inp->inp_socket->so_options & SO_BINTIME) { - *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), - SCM_BINTIME, SOL_SOCKET); - if (*mp) - mp = &(*mp)->m_next; - } - if (inp->inp_socket->so_options & SO_TIMESTAMP) { - struct timeval tv; + *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), + SCM_BINTIME, SOL_SOCKET); + if (*mp) + mp = &(*mp)->m_next; + } + if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) { + struct timeval tv; - bintime2timeval(&bt, &tv); - *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), - SCM_TIMESTAMP, SOL_SOCKET); - if (*mp) - mp = &(*mp)->m_next; - } + microtime(&tv); + *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), + SCM_TIMESTAMP, SOL_SOCKET); + if (*mp) + mp = &(*mp)->m_next; + } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) { + struct timespec ts; + + nanotime(&ts); + *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), + SCM_REALTIME, SOL_SOCKET); + if (*mp) + mp = &(*mp)->m_next; + } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) { + struct timespec ts; + + nanouptime(&ts); + *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), + SCM_MONOTONIC, SOL_SOCKET); + if (*mp) + mp = &(*mp)->m_next; } if (inp->inp_flags & INP_RECVDSTADDR) { *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, diff --git a/freebsd/sys/netinet/ip_ipsec.c b/freebsd/sys/netinet/ip_ipsec.c deleted file mode 100644 index f3516f1c..00000000 --- a/freebsd/sys/netinet/ip_ipsec.c +++ /dev/null @@ -1,409 +0,0 @@ -#include - -/*- - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef SCTP -#include -#endif - -#include - -#ifdef IPSEC -#include -#include -#include -#endif /*IPSEC*/ - -extern struct protosw inetsw[]; - -#ifdef IPSEC -#ifdef IPSEC_FILTERTUNNEL -static VNET_DEFINE(int, ip4_ipsec_filtertunnel) = 1; -#else -static VNET_DEFINE(int, ip4_ipsec_filtertunnel) = 0; -#endif -#define V_ip4_ipsec_filtertunnel VNET(ip4_ipsec_filtertunnel) - -SYSCTL_DECL(_net_inet_ipsec); -SYSCTL_VNET_INT(_net_inet_ipsec, OID_AUTO, filtertunnel, - CTLFLAG_RW, &VNET_NAME(ip4_ipsec_filtertunnel), 0, - "If set filter packets from an IPsec tunnel."); -#endif /* IPSEC */ - -/* - * Check if we have to jump over firewall processing for this packet. - * Called from ip_input(). - * 1 = jump over firewall, 0 = packet goes through firewall. - */ -int -ip_ipsec_filtertunnel(struct mbuf *m) -{ -#if defined(IPSEC) - - /* - * Bypass packet filtering for packets previously handled by IPsec. - */ - if (!V_ip4_ipsec_filtertunnel && - m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) - return 1; -#endif - return 0; -} - -/* - * Check if this packet has an active SA and needs to be dropped instead - * of forwarded. - * Called from ip_input(). - * 1 = drop packet, 0 = forward packet. - */ -int -ip_ipsec_fwd(struct mbuf *m) -{ -#ifdef IPSEC - struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secpolicy *sp; - int s, error; - - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); - s = splnet(); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); - } else { - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, - IP_FORWARDING, &error); - } - if (sp == NULL) { /* NB: can happen if error */ - splx(s); - /*XXX error stat???*/ - DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/ - return 1; - } - - /* - * Check security policy against packet attributes. - */ - error = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - splx(s); - if (error) { - IPSTAT_INC(ips_cantforward); - return 1; - } -#endif /* IPSEC */ - return 0; -} - -/* - * Check if protocol type doesn't have a further header and do IPSEC - * decryption or reject right now. Protocols with further headers get - * their IPSEC treatment within the protocol specific processing. - * Called from ip_input(). - * 1 = drop packet, 0 = continue processing packet. - */ -int -ip_ipsec_input(struct mbuf *m) -{ -#ifdef IPSEC - struct ip *ip = mtod(m, struct ip *); - struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secpolicy *sp; - int s, error; - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { - /* - * Check if the packet has already had IPsec processing - * done. If so, then just pass it along. This tag gets - * set during AH, ESP, etc. input handling, before the - * packet is returned to the ip input queue for delivery. - */ - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); - s = splnet(); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); - } else { - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, - IP_FORWARDING, &error); - } - if (sp != NULL) { - /* - * Check security policy against packet attributes. - */ - error = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - } else { - /* XXX error stat??? */ - error = EINVAL; - DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ - return 1; - } - splx(s); - if (error) - return 1; - } -#endif /* IPSEC */ - return 0; -} - -/* - * Compute the MTU for a forwarded packet that gets IPSEC encapsulated. - * Called from ip_forward(). - * Returns MTU suggestion for ICMP needfrag reply. - */ -int -ip_ipsec_mtu(struct mbuf *m, int mtu) -{ - /* - * If the packet is routed over IPsec tunnel, tell the - * originator the tunnel MTU. - * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz - * XXX quickhack!!! - */ - struct secpolicy *sp = NULL; - int ipsecerror; - int ipsechdr; - struct route *ro; - sp = ipsec_getpolicybyaddr(m, - IPSEC_DIR_OUTBOUND, - IP_FORWARDING, - &ipsecerror); - if (sp != NULL) { - /* count IPsec header size */ - ipsechdr = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, NULL); - - /* - * find the correct route for outer IPv4 - * header, compute tunnel MTU. - */ - if (sp->req != NULL && - sp->req->sav != NULL && - sp->req->sav->sah != NULL) { - ro = &sp->req->sav->sah->route_cache.sa_route; - if (ro->ro_rt && ro->ro_rt->rt_ifp) { - mtu = - ro->ro_rt->rt_rmx.rmx_mtu ? - ro->ro_rt->rt_rmx.rmx_mtu : - ro->ro_rt->rt_ifp->if_mtu; - mtu -= ipsechdr; - } - } - KEY_FREESP(&sp); - } - return mtu; -} - -/* - * - * Called from ip_output(). - * 1 = drop packet, 0 = continue processing packet, - * -1 = packet was reinjected and stop processing packet - */ -int -ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error) -{ -#ifdef IPSEC - struct secpolicy *sp = NULL; - struct ip *ip = mtod(*m, struct ip *); - struct tdb_ident *tdbi; - struct m_tag *mtag; - int s; - /* - * Check the security policy (SP) for the packet and, if - * required, do IPsec-related processing. There are two - * cases here; the first time a packet is sent through - * it will be untagged and handled by ipsec4_checkpolicy. - * If the packet is resubmitted to ip_output (e.g. after - * AH, ESP, etc. processing), there will be a tag to bypass - * the lookup and related policy checking. - */ - mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); - s = splnet(); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); - if (sp == NULL) - *error = -EINVAL; /* force silent drop */ - m_tag_delete(*m, mtag); - } else { - sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags, - error, inp); - } - /* - * There are four return cases: - * sp != NULL apply IPsec policy - * sp == NULL, error == 0 no IPsec handling needed - * sp == NULL, error == -EINVAL discard packet w/o error - * sp == NULL, error != 0 discard packet, report error - */ - if (sp != NULL) { - /* Loop detection, check if ipsec processing already done */ - KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); - for (mtag = m_tag_first(*m); mtag != NULL; - mtag = m_tag_next(*m, mtag)) { - if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) - continue; - if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && - mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) - continue; - /* - * Check if policy has an SA associated with it. - * This can happen when an SP has yet to acquire - * an SA; e.g. on first reference. If it occurs, - * then we let ipsec4_process_packet do its thing. - */ - if (sp->req->sav == NULL) - break; - tdbi = (struct tdb_ident *)(mtag + 1); - if (tdbi->spi == sp->req->sav->spi && - tdbi->proto == sp->req->sav->sah->saidx.proto && - bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, - sizeof (union sockaddr_union)) == 0) { - /* - * No IPsec processing is needed, free - * reference to SP. - * - * NB: null pointer to avoid free at - * done: below. - */ - KEY_FREESP(&sp), sp = NULL; - splx(s); - goto done; - } - } - - /* - * Do delayed checksums now because we send before - * this is done in the normal processing path. - */ - if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(*m); - (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } -#ifdef SCTP - if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP) { - sctp_delayed_cksum(*m, (uint32_t)(ip->ip_hl << 2)); - (*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP; - } -#endif - ip->ip_len = htons(ip->ip_len); - ip->ip_off = htons(ip->ip_off); - - /* NB: callee frees mbuf */ - *error = ipsec4_process_packet(*m, sp->req, *flags, 0); - if (*error == EJUSTRETURN) { - /* - * We had a SP with a level of 'use' and no SA. We - * will just continue to process the packet without - * IPsec processing and return without error. - */ - *error = 0; - ip->ip_len = ntohs(ip->ip_len); - ip->ip_off = ntohs(ip->ip_off); - goto done; - } - /* - * Preserve KAME behaviour: ENOENT can be returned - * when an SA acquire is in progress. Don't propagate - * this to user-level; it confuses applications. - * - * XXX this will go away when the SADB is redone. - */ - if (*error == ENOENT) - *error = 0; - splx(s); - goto reinjected; - } else { /* sp == NULL */ - splx(s); - - if (*error != 0) { - /* - * Hack: -EINVAL is used to signal that a packet - * should be silently discarded. This is typically - * because we asked key management for an SA and - * it was delayed (e.g. kicked up to IKE). - */ - if (*error == -EINVAL) - *error = 0; - goto bad; - } else { - /* No IPsec processing for this packet. */ - } - } -done: - if (sp != NULL) - KEY_FREESP(&sp); - return 0; -reinjected: - if (sp != NULL) - KEY_FREESP(&sp); - return -1; -bad: - if (sp != NULL) - KEY_FREESP(&sp); - return 1; -#endif /* IPSEC */ - return 0; -} diff --git a/freebsd/sys/netinet/ip_ipsec.h b/freebsd/sys/netinet/ip_ipsec.h deleted file mode 100644 index f499b740..00000000 --- a/freebsd/sys/netinet/ip_ipsec.h +++ /dev/null @@ -1,40 +0,0 @@ -/*- - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _NETINET_IP_IPSEC_H_ -#define _NETINET_IP_IPSEC_H_ - -int ip_ipsec_filtertunnel(struct mbuf *); -int ip_ipsec_fwd(struct mbuf *); -int ip_ipsec_input(struct mbuf *, int); -int ip_ipsec_mtu(struct mbuf *, int); -int ip_ipsec_output(struct mbuf **, struct inpcb *, int *); -#endif diff --git a/freebsd/sys/netinet/ip_mroute.c b/freebsd/sys/netinet/ip_mroute.c index f5aa0a38..85623b21 100644 --- a/freebsd/sys/netinet/ip_mroute.c +++ b/freebsd/sys/netinet/ip_mroute.c @@ -16,7 +16,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -930,8 +930,8 @@ add_vif(struct vifctl *vifcp) VIF_UNLOCK(); - CTR4(KTR_IPMF, "%s: add vif %d laddr %s thresh %x", __func__, - (int)vifcp->vifc_vifi, inet_ntoa(vifcp->vifc_lcl_addr), + CTR4(KTR_IPMF, "%s: add vif %d laddr 0x%08x thresh %x", __func__, + (int)vifcp->vifc_vifi, ntohl(vifcp->vifc_lcl_addr.s_addr), (int)vifcp->vifc_threshold); return 0; @@ -1062,8 +1062,8 @@ add_mfc(struct mfcctl2 *mfccp) /* If an entry already exists, just update the fields */ if (rt) { - CTR4(KTR_IPMF, "%s: update mfc orig %s group %lx parent %x", - __func__, inet_ntoa(mfccp->mfcc_origin), + CTR4(KTR_IPMF, "%s: update mfc orig 0x%08x group %lx parent %x", + __func__, ntohl(mfccp->mfcc_origin.s_addr), (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent); update_mfc_params(rt, mfccp); @@ -1082,8 +1082,8 @@ add_mfc(struct mfcctl2 *mfccp) in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) && !TAILQ_EMPTY(&rt->mfc_stall)) { CTR5(KTR_IPMF, - "%s: add mfc orig %s group %lx parent %x qh %p", - __func__, inet_ntoa(mfccp->mfcc_origin), + "%s: add mfc orig 0x%08x group %lx parent %x qh %p", + __func__, ntohl(mfccp->mfcc_origin.s_addr), (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent, TAILQ_FIRST(&rt->mfc_stall)); @@ -1161,8 +1161,8 @@ del_mfc(struct mfcctl2 *mfccp) origin = mfccp->mfcc_origin; mcastgrp = mfccp->mfcc_mcastgrp; - CTR3(KTR_IPMF, "%s: delete mfc orig %s group %lx", __func__, - inet_ntoa(origin), (u_long)ntohl(mcastgrp.s_addr)); + CTR3(KTR_IPMF, "%s: delete mfc orig 0x%08x group %lx", __func__, + ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr)); MFC_LOCK(); @@ -1226,8 +1226,8 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, int error; vifi_t vifi; - CTR3(KTR_IPMF, "ip_mforward: delete mfc orig %s group %lx ifp %p", - inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr), ifp); + CTR3(KTR_IPMF, "ip_mforward: delete mfc orig 0x%08x group %lx ifp %p", + ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr), ifp); if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 || ((u_char *)(ip + 1))[1] != IPOPT_LSRR ) { @@ -1289,8 +1289,8 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, MRTSTAT_INC(mrts_mfc_misses); MRTSTAT_INC(mrts_no_route); - CTR2(KTR_IPMF, "ip_mforward: no mfc for (%s,%lx)", - inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr)); + CTR2(KTR_IPMF, "ip_mforward: no mfc for (0x%08x,%lx)", + ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr)); /* * Allocate mbufs early so that we don't do extra work if we are @@ -2572,7 +2572,7 @@ pim_input(struct mbuf **mp, int *offp, int proto) int minlen; int datalen = ntohs(ip->ip_len) - iphlen; int ip_tos; - + *mp = NULL; /* Keep statistics */ @@ -2584,8 +2584,8 @@ pim_input(struct mbuf **mp, int *offp, int proto) */ if (datalen < PIM_MINLEN) { PIMSTAT_INC(pims_rcv_tooshort); - CTR3(KTR_IPMF, "%s: short packet (%d) from %s", - __func__, datalen, inet_ntoa(ip->ip_src)); + CTR3(KTR_IPMF, "%s: short packet (%d) from 0x%08x", + __func__, datalen, ntohl(ip->ip_src.s_addr)); m_freem(m); return (IPPROTO_DONE); } @@ -2684,8 +2684,9 @@ pim_input(struct mbuf **mp, int *offp, int proto) reghdr = (u_int32_t *)(pim + 1); encap_ip = (struct ip *)(reghdr + 1); - CTR3(KTR_IPMF, "%s: register: encap ip src %s len %d", - __func__, inet_ntoa(encap_ip->ip_src), ntohs(encap_ip->ip_len)); + CTR3(KTR_IPMF, "%s: register: encap ip src 0x%08x len %d", + __func__, ntohl(encap_ip->ip_src.s_addr), + ntohs(encap_ip->ip_len)); /* verify the version number of the inner packet */ if (encap_ip->ip_v != IPVERSION) { @@ -2698,8 +2699,8 @@ pim_input(struct mbuf **mp, int *offp, int proto) /* verify the inner packet is destined to a mcast group */ if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) { PIMSTAT_INC(pims_rcv_badregisters); - CTR2(KTR_IPMF, "%s: bad encap ip dest %s", __func__, - inet_ntoa(encap_ip->ip_dst)); + CTR2(KTR_IPMF, "%s: bad encap ip dest 0x%08x", __func__, + ntohl(encap_ip->ip_dst.s_addr)); m_freem(m); return (IPPROTO_DONE); } diff --git a/freebsd/sys/netinet/ip_mroute.h b/freebsd/sys/netinet/ip_mroute.h index 65f7d83c..66bb65c3 100644 --- a/freebsd/sys/netinet/ip_mroute.h +++ b/freebsd/sys/netinet/ip_mroute.h @@ -14,7 +14,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/ip_options.c b/freebsd/sys/netinet/ip_options.c index 134479c9..72eed66b 100644 --- a/freebsd/sys/netinet/ip_options.c +++ b/freebsd/sys/netinet/ip_options.c @@ -14,7 +14,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -198,16 +198,19 @@ ip_dooptions(struct mbuf *m, int pass) #endif if (!V_ip_dosourceroute) { if (V_ipforwarding) { - char buf[16]; /* aaa.bbb.ccc.ddd\0 */ + char srcbuf[INET_ADDRSTRLEN]; + char dstbuf[INET_ADDRSTRLEN]; + /* * Acting as a router, so generate * ICMP */ nosourcerouting: - strcpy(buf, inet_ntoa(ip->ip_dst)); log(LOG_WARNING, - "attempted source route from %s to %s\n", - inet_ntoa(ip->ip_src), buf); + "attempted source route from %s " + "to %s\n", + inet_ntoa_r(ip->ip_src, srcbuf), + inet_ntoa_r(ip->ip_dst, dstbuf)); type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; diff --git a/freebsd/sys/netinet/ip_options.h b/freebsd/sys/netinet/ip_options.h index 4a6ea420..b7d2fb0a 100644 --- a/freebsd/sys/netinet/ip_options.h +++ b/freebsd/sys/netinet/ip_options.h @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/ip_output.c b/freebsd/sys/netinet/ip_output.c index 541acb2f..dfb3f8ad 100644 --- a/freebsd/sys/netinet/ip_output.c +++ b/freebsd/sys/netinet/ip_output.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include #include @@ -85,10 +86,7 @@ __FBSDID("$FreeBSD$"); #include #endif -#ifdef IPSEC -#include -#include -#endif /* IPSEC*/ +#include #include @@ -229,7 +227,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct rtentry *rte; /* cache for ro->ro_rt */ uint32_t fibnum; int have_ia_ref; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) int no_route_but_check_spd = 0; #endif M_ASSERTPKTHDR(m); @@ -246,8 +244,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, if (ro == NULL) { ro = &iproute; bzero(ro, sizeof (*ro)); - } else - ro->ro_flags |= RT_LLE_CACHE; + } #ifdef FLOWTABLE if (ro->ro_rt == NULL) @@ -385,7 +382,7 @@ again: (rte->rt_flags & RTF_UP) == 0 || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * There is no route for this packet, but it is * possible that a matching SPD entry exists. @@ -557,15 +554,13 @@ again: } sendit: -#ifdef IPSEC - switch(ip_ipsec_output(&m, inp, &error)) { - case 1: - goto bad; - case -1: - goto done; - case 0: - default: - break; /* Continue with packet processing. */ +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv4)) { + if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) { + if (error == EINPROGRESS) + error = 0; + goto done; + } } /* * Check if there was a route for this packet; return error if not. @@ -663,8 +658,23 @@ sendit: */ m_clrprotoflags(m); IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); +#ifdef RATELIMIT + if (inp != NULL) { + if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) + in_pcboutput_txrtlmt(inp, ifp, m); + /* stamp send tag on mbuf */ + m->m_pkthdr.snd_tag = inp->inp_snd_tag; + } else { + m->m_pkthdr.snd_tag = NULL; + } +#endif error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro); +#ifdef RATELIMIT + /* check for route change */ + if (error == EAGAIN) + in_pcboutput_eagain(inp); +#endif goto done; } @@ -700,8 +710,23 @@ sendit: IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp, mtod(m, struct ip *), NULL); +#ifdef RATELIMIT + if (inp != NULL) { + if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) + in_pcboutput_txrtlmt(inp, ifp, m); + /* stamp send tag on mbuf */ + m->m_pkthdr.snd_tag = inp->inp_snd_tag; + } else { + m->m_pkthdr.snd_tag = NULL; + } +#endif error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro); +#ifdef RATELIMIT + /* check for route change */ + if (error == EAGAIN) + in_pcboutput_eagain(inp); +#endif } else m_freem(m); } @@ -976,6 +1001,16 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) INP_WUNLOCK(inp); error = 0; break; + case SO_MAX_PACING_RATE: +#ifdef RATELIMIT + INP_WLOCK(inp); + inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; + INP_WUNLOCK(inp); + error = 0; +#else + error = EOPNOTSUPP; +#endif + break; default: break; } @@ -1031,6 +1066,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) case IP_MINTTL: case IP_RECVOPTS: case IP_RECVRETOPTS: + case IP_ORIGDSTADDR: case IP_RECVDSTADDR: case IP_RECVTTL: case IP_RECVIF: @@ -1092,6 +1128,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) OPTSET(INP_RECVDSTADDR); break; + case IP_ORIGDSTADDR: + OPTSET2(INP_ORIGDSTADDR, optval); + break; + case IP_RECVTTL: OPTSET(INP_RECVTTL); break; @@ -1191,23 +1231,13 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) INP_WUNLOCK(inp); break; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) case IP_IPSEC_POLICY: - { - caddr_t req; - struct mbuf *m; - - if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ + if (IPSEC_ENABLED(ipv4)) { + error = IPSEC_PCBCTL(ipv4, inp, sopt); break; - if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ - break; - req = mtod(m, caddr_t); - error = ipsec_set_policy(inp, sopt->sopt_name, req, - m->m_len, (sopt->sopt_td != NULL) ? - sopt->sopt_td->td_ucred : NULL); - m_freem(m); - break; - } + } + /* FALLTHROUGH */ #endif /* IPSEC */ default: @@ -1234,6 +1264,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) case IP_MINTTL: case IP_RECVOPTS: case IP_RECVRETOPTS: + case IP_ORIGDSTADDR: case IP_RECVDSTADDR: case IP_RECVTTL: case IP_RECVIF: @@ -1279,6 +1310,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) optval = OPTBIT(INP_RECVDSTADDR); break; + case IP_ORIGDSTADDR: + optval = OPTBIT2(INP_ORIGDSTADDR); + break; + case IP_RECVTTL: optval = OPTBIT(INP_RECVTTL); break; @@ -1350,24 +1385,13 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) error = inp_getmoptions(inp, sopt); break; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) case IP_IPSEC_POLICY: - { - struct mbuf *m = NULL; - caddr_t req = NULL; - size_t len = 0; - - if (m != NULL) { - req = mtod(m, caddr_t); - len = m->m_len; + if (IPSEC_ENABLED(ipv4)) { + error = IPSEC_PCBCTL(ipv4, inp, sopt); + break; } - error = ipsec_get_policy(sotoinpcb(so), req, len, &m); - if (error == 0) - error = soopt_mcopyout(sopt, m); /* XXX */ - if (error == 0) - m_freem(m); - break; - } + /* FALLTHROUGH */ #endif /* IPSEC */ default: diff --git a/freebsd/sys/netinet/ip_reass.c b/freebsd/sys/netinet/ip_reass.c index aae24b9d..4cfabc8f 100644 --- a/freebsd/sys/netinet/ip_reass.c +++ b/freebsd/sys/netinet/ip_reass.c @@ -14,7 +14,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/ip_var.h b/freebsd/sys/netinet/ip_var.h index 847704fd..f7e58d18 100644 --- a/freebsd/sys/netinet/ip_var.h +++ b/freebsd/sys/netinet/ip_var.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/libalias/alias_local.h b/freebsd/sys/netinet/libalias/alias_local.h index 3010be84..b8632359 100644 --- a/freebsd/sys/netinet/libalias/alias_local.h +++ b/freebsd/sys/netinet/libalias/alias_local.h @@ -70,6 +70,12 @@ #define GET_ALIAS_PORT -1 #define GET_ALIAS_ID GET_ALIAS_PORT +#ifdef _KERNEL +#define INET_NTOA_BUF(buf) (buf) +#else +#define INET_NTOA_BUF(buf) (buf), sizeof(buf) +#endif + struct proxy_entry; struct libalias { diff --git a/freebsd/sys/netinet/libalias/alias_nbt.c b/freebsd/sys/netinet/libalias/alias_nbt.c index c10f9b48..d3fbb98d 100644 --- a/freebsd/sys/netinet/libalias/alias_nbt.c +++ b/freebsd/sys/netinet/libalias/alias_nbt.c @@ -346,6 +346,9 @@ AliasHandleUdpNbt( NbtDataHeader *ndh; u_char *p = NULL; char *pmax; +#ifdef LIBALIAS_DEBUG + char addrbuf[INET_ADDRSTRLEN]; +#endif (void)la; (void)lnk; @@ -381,7 +384,8 @@ AliasHandleUdpNbt( if (p == NULL || (char *)p > pmax) p = NULL; #ifdef LIBALIAS_DEBUG - printf("%s:%d-->", inet_ntoa(ndh->source_ip), ntohs(ndh->source_port)); + printf("%s:%d-->", inet_ntoa_r(ndh->source_ip, INET_NTOA_BUF(addrbuf)), + ntohs(ndh->source_port)); #endif /* Doing an IP address and Port number Translation */ if (uh->uh_sum != 0) { @@ -401,7 +405,8 @@ AliasHandleUdpNbt( ndh->source_ip = *alias_address; ndh->source_port = alias_port; #ifdef LIBALIAS_DEBUG - printf("%s:%d\n", inet_ntoa(ndh->source_ip), ntohs(ndh->source_port)); + printf("%s:%d\n", inet_ntoa_r(ndh->source_ip, INET_NTOA_BUF(addrbuf)), + ntohs(ndh->source_port)); fflush(stdout); #endif return ((p == NULL) ? -1 : 0); @@ -482,6 +487,10 @@ AliasHandleResourceNB( { NBTNsRNB *nb; u_short bcount; +#ifdef LIBALIAS_DEBUG + char oldbuf[INET_ADDRSTRLEN]; + char newbuf[INET_ADDRSTRLEN]; +#endif if (q == NULL || (char *)(q + 1) > pmax) return (NULL); @@ -493,8 +502,10 @@ AliasHandleResourceNB( /* Processing all in_addr array */ #ifdef LIBALIAS_DEBUG - printf("NB rec[%s", inet_ntoa(nbtarg->oldaddr)); - printf("->%s, %dbytes] ", inet_ntoa(nbtarg->newaddr), bcount); + printf("NB rec[%s->%s, %dbytes] ", + inet_ntoa_r(nbtarg->oldaddr, INET_NTOA_BUF(oldbuf)), + inet_ntoa_r(nbtarg->newaddr, INET_NTOA_BUF(newbuf)), + bcount); #endif while (nb != NULL && bcount != 0) { if ((char *)(nb + 1) > pmax) { @@ -502,7 +513,7 @@ AliasHandleResourceNB( break; } #ifdef LIBALIAS_DEBUG - printf("<%s>", inet_ntoa(nb->addr)); + printf("<%s>", inet_ntoa_r(nb->addr, INET_NTOA_BUF(newbuf))); #endif if (!bcmp(&nbtarg->oldaddr, &nb->addr, sizeof(struct in_addr))) { if (*nbtarg->uh_sum != 0) { @@ -549,6 +560,10 @@ AliasHandleResourceA( { NBTNsResourceA *a; u_short bcount; +#ifdef LIBALIAS_DEBUG + char oldbuf[INET_ADDRSTRLEN]; + char newbuf[INET_ADDRSTRLEN]; +#endif if (q == NULL || (char *)(q + 1) > pmax) return (NULL); @@ -561,14 +576,15 @@ AliasHandleResourceA( /* Processing all in_addr array */ #ifdef LIBALIAS_DEBUG - printf("Arec [%s", inet_ntoa(nbtarg->oldaddr)); - printf("->%s]", inet_ntoa(nbtarg->newaddr)); + printf("Arec [%s->%s]", + inet_ntoa_r(nbtarg->oldaddr, INET_NTOA_BUF(oldbuf)), + inet_ntoa_r(nbtarg->newaddr, INET_NTOA_BUF(newbuf))); #endif while (bcount != 0) { if (a == NULL || (char *)(a + 1) > pmax) return (NULL); #ifdef LIBALIAS_DEBUG - printf("..%s", inet_ntoa(a->addr)); + printf("..%s", inet_ntoa_r(a->addr, INET_NTOA_BUF(newbuf))); #endif if (!bcmp(&nbtarg->oldaddr, &a->addr, sizeof(struct in_addr))) { if (*nbtarg->uh_sum != 0) { diff --git a/freebsd/sys/netinet/libalias/alias_proxy.c b/freebsd/sys/netinet/libalias/alias_proxy.c index e45abad4..fdd46b1d 100644 --- a/freebsd/sys/netinet/libalias/alias_proxy.c +++ b/freebsd/sys/netinet/libalias/alias_proxy.c @@ -296,6 +296,7 @@ ProxyEncodeTcpStream(struct alias_link *lnk, int slen; char buffer[40]; struct tcphdr *tc; + char addrbuf[INET_ADDRSTRLEN]; /* Compute pointer to tcp header */ tc = (struct tcphdr *)ip_next(pip); @@ -307,7 +308,8 @@ ProxyEncodeTcpStream(struct alias_link *lnk, /* Translate destination address and port to string form */ snprintf(buffer, sizeof(buffer) - 2, "[DEST %s %d]", - inet_ntoa(GetProxyAddress(lnk)), (u_int) ntohs(GetProxyPort(lnk))); + inet_ntoa_r(GetProxyAddress(lnk), INET_NTOA_BUF(addrbuf)), + (u_int) ntohs(GetProxyPort(lnk))); /* Pad string out to a multiple of two in length */ slen = strlen(buffer); @@ -720,7 +722,8 @@ LibAliasProxyRule(struct libalias *la, const char *cmd) err = RuleNumberDelete(la, rule_to_delete); if (err) ret = -1; - ret = 0; + else + ret = 0; goto getout; } diff --git a/freebsd/sys/netinet/libalias/alias_sctp.c b/freebsd/sys/netinet/libalias/alias_sctp.c index 6158149a..c4048410 100644 --- a/freebsd/sys/netinet/libalias/alias_sctp.c +++ b/freebsd/sys/netinet/libalias/alias_sctp.c @@ -906,6 +906,7 @@ TxAbortErrorM(struct libalias *la, struct sctp_nat_msg *sm, struct sctp_nat_asso int ip_size = sizeof(struct ip) + sctp_size; int include_error_cause = 1; char tmp_ip[ip_size]; + char addrbuf[INET_ADDRSTRLEN]; if (ntohs(sm->ip_hdr->ip_len) < ip_size) { /* short packet, cannot send error cause */ include_error_cause = 0; @@ -986,7 +987,8 @@ TxAbortErrorM(struct libalias *la, struct sctp_nat_msg *sm, struct sctp_nat_asso ((sndrply == SN_SEND_ABORT) ? "Sending" : "Replying"), ((sndrply & SN_TX_ERROR) ? "ErrorM" : "AbortM"), (include_error_cause ? ntohs(error_cause->code) : 0), - inet_ntoa(ip->ip_dst),ntohs(sctp_hdr->dest_port), + inet_ntoa_r(ip->ip_dst, INET_NTOA_BUF(addrbuf)), + ntohs(sctp_hdr->dest_port), ntohl(sctp_hdr->v_tag), ntohl(sctp_hdr->checksum))); } @@ -2576,6 +2578,8 @@ static void logsctpassoc(struct sctp_nat_assoc *assoc, char* s) { struct sctp_GlobalAddress *G_Addr = NULL; char *sp; + char addrbuf[INET_ADDRSTRLEN]; + switch(assoc->state) { case SN_ID: sp = "ID "; @@ -2600,12 +2604,14 @@ static void logsctpassoc(struct sctp_nat_assoc *assoc, char* s) break; } SctpAliasLog("%sAssoc: %s exp=%u la=%s lv=%u lp=%u gv=%u gp=%u tbl=%d\n", - s, sp, assoc->exp, inet_ntoa(assoc->l_addr), ntohl(assoc->l_vtag), - ntohs(assoc->l_port), ntohl(assoc->g_vtag), ntohs(assoc->g_port), + s, sp, assoc->exp, inet_ntoa_r(assoc->l_addr, addrbuf), + ntohl(assoc->l_vtag), ntohs(assoc->l_port), + ntohl(assoc->g_vtag), ntohs(assoc->g_port), assoc->TableRegister); /* list global addresses */ LIST_FOREACH(G_Addr, &(assoc->Gaddr), list_Gaddr) { - SctpAliasLog("\t\tga=%s\n",inet_ntoa(G_Addr->g_addr)); + SctpAliasLog("\t\tga=%s\n", + inet_ntoa_r(G_Addr->g_addr, addrbuf)); } } diff --git a/freebsd/sys/netinet/raw_ip.c b/freebsd/sys/netinet/raw_ip.c index c379d681..b9fae844 100644 --- a/freebsd/sys/netinet/raw_ip.c +++ b/freebsd/sys/netinet/raw_ip.c @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -75,9 +75,7 @@ __FBSDID("$FreeBSD$"); #include #include -#ifdef IPSEC -#include -#endif /*IPSEC*/ +#include #include #include @@ -238,10 +236,11 @@ rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n, INP_LOCK_ASSERT(last); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* check AH/ESP integrity. */ - if (ipsec4_in_reject(n, last)) { - policyfail = 1; + if (IPSEC_ENABLED(ipv4)) { + if (IPSEC_CHECK_POLICY(ipv4, n, last) != 0) + policyfail = 1; } #endif /* IPSEC */ #ifdef MAC @@ -510,7 +509,7 @@ rip_output(struct mbuf *m, struct socket *so, ...) * and don't allow packet length sizes that will crash. */ if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) - || (ntohs(ip->ip_len) > m->m_pkthdr.len) + || (ntohs(ip->ip_len) != m->m_pkthdr.len) || (ntohs(ip->ip_len) < (ip->ip_hl << 2))) { INP_RUNLOCK(inp); m_freem(m); @@ -1080,12 +1079,7 @@ rip_pcblist(SYSCTL_HANDLER_ARGS) if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; - bzero(&xi, sizeof(xi)); - xi.xi_len = sizeof xi; - /* XXX should avoid extra copy */ - bcopy(inp, &xi.xi_inp, sizeof *inp); - if (inp->inp_socket) - sotoxsocket(inp->inp_socket, &xi.xi_socket); + in_pcbtoxinpcb(inp, &xi); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else diff --git a/freebsd/sys/netinet/sctp_input.c b/freebsd/sys/netinet/sctp_input.c index 7e84ebd1..d363642a 100644 --- a/freebsd/sys/netinet/sctp_input.c +++ b/freebsd/sys/netinet/sctp_input.c @@ -5792,34 +5792,6 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt } else if (stcb == NULL) { inp_decr = inp; } -#ifdef IPSEC - /*- - * I very much doubt any of the IPSEC stuff will work but I have no - * idea, so I will leave it in place. - */ - if (inp != NULL) { - switch (dst->sa_family) { -#ifdef INET - case AF_INET: - if (ipsec4_in_reject(m, &inp->ip_inp.inp)) { - SCTP_STAT_INCR(sctps_hdrops); - goto out; - } - break; -#endif -#ifdef INET6 - case AF_INET6: - if (ipsec6_in_reject(m, &inp->ip_inp.inp)) { - SCTP_STAT_INCR(sctps_hdrops); - goto out; - } - break; -#endif - default: - break; - } - } -#endif SCTPDBG(SCTP_DEBUG_INPUT1, "Ok, Common input processing called, m:%p iphlen:%d offset:%d length:%d stcb:%p\n", (void *)m, iphlen, offset, length, (void *)stcb); if (stcb) { diff --git a/freebsd/sys/netinet/sctp_os_bsd.h b/freebsd/sys/netinet/sctp_os_bsd.h index 438973cb..f603cec6 100644 --- a/freebsd/sys/netinet/sctp_os_bsd.h +++ b/freebsd/sys/netinet/sctp_os_bsd.h @@ -38,7 +38,6 @@ __FBSDID("$FreeBSD$"); /* * includes */ -#include #include #include #include @@ -82,16 +81,8 @@ __FBSDID("$FreeBSD$"); #include #include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ - #ifdef INET6 #include -#ifdef IPSEC -#include -#endif #include #include #include @@ -100,7 +91,6 @@ __FBSDID("$FreeBSD$"); #include #endif /* INET6 */ - #include #include diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c index a27f2a3e..2e6eedaf 100644 --- a/freebsd/sys/netinet/sctp_output.c +++ b/freebsd/sys/netinet/sctp_output.c @@ -7082,11 +7082,9 @@ sctp_clean_up_ctl(struct sctp_tcb *stcb, struct sctp_association *asoc, int so_l } } - -static int -sctp_can_we_split_this(struct sctp_tcb *stcb, - uint32_t length, - uint32_t goal_mtu, uint32_t frag_point, int eeor_on) +static uint32_t +sctp_can_we_split_this(struct sctp_tcb *stcb, uint32_t length, + uint32_t space_left, uint32_t frag_point, int eeor_on) { /* * Make a decision on if I should split a msg into multiple parts. @@ -7098,7 +7096,7 @@ sctp_can_we_split_this(struct sctp_tcb *stcb, * entire thing, since it might be all the guy is putting in * the hopper. */ - if (goal_mtu >= length) { + if (space_left >= length) { /*- * If we have data outstanding, * we get another chance when the sack @@ -7115,7 +7113,7 @@ sctp_can_we_split_this(struct sctp_tcb *stcb, } else { /* You can fill the rest */ - return (goal_mtu); + return (space_left); } } /*- @@ -7126,28 +7124,27 @@ sctp_can_we_split_this(struct sctp_tcb *stcb, if (SCTP_SB_LIMIT_SND(stcb->sctp_socket) < frag_point) { return (length); } - if ((length <= goal_mtu) || - ((length - goal_mtu) < SCTP_BASE_SYSCTL(sctp_min_residual))) { + if ((length <= space_left) || + ((length - space_left) < SCTP_BASE_SYSCTL(sctp_min_residual))) { /* Sub-optimial residual don't split in non-eeor mode. */ return (0); } /* - * If we reach here length is larger than the goal_mtu. Do we wish + * If we reach here length is larger than the space_left. Do we wish * to split it for the sake of packet putting together? */ - if (goal_mtu >= min(SCTP_BASE_SYSCTL(sctp_min_split_point), frag_point)) { + if (space_left >= min(SCTP_BASE_SYSCTL(sctp_min_split_point), frag_point)) { /* Its ok to split it */ - return (min(goal_mtu, frag_point)); + return (min(space_left, frag_point)); } /* Nope, can't split */ return (0); - } static uint32_t sctp_move_to_outqueue(struct sctp_tcb *stcb, struct sctp_stream_out *strq, - uint32_t goal_mtu, + uint32_t space_left, uint32_t frag_point, int *giveup, int eeor_mode, @@ -7308,7 +7305,7 @@ re_look: sp->some_taken = 1; } } else { - to_move = sctp_can_we_split_this(stcb, length, goal_mtu, frag_point, eeor_mode); + to_move = sctp_can_we_split_this(stcb, length, space_left, frag_point, eeor_mode); if (to_move) { /*- * We use a snapshot of length in case it @@ -7703,56 +7700,66 @@ sctp_fill_outqueue(struct sctp_tcb *stcb, { struct sctp_association *asoc; struct sctp_stream_out *strq; - int goal_mtu, moved_how_much, total_moved = 0, bail = 0; - int giveup; + uint32_t space_left, moved, total_moved; + int bail, giveup; SCTP_TCB_LOCK_ASSERT(stcb); asoc = &stcb->asoc; + total_moved = 0; switch (net->ro._l_addr.sa.sa_family) { #ifdef INET case AF_INET: - goal_mtu = net->mtu - SCTP_MIN_V4_OVERHEAD; + space_left = net->mtu - SCTP_MIN_V4_OVERHEAD; break; #endif #ifdef INET6 case AF_INET6: - goal_mtu = net->mtu - SCTP_MIN_OVERHEAD; + space_left = net->mtu - SCTP_MIN_OVERHEAD; break; #endif default: /* TSNH */ - goal_mtu = net->mtu; + space_left = net->mtu; break; } /* Need an allowance for the data chunk header too */ if (stcb->asoc.idata_supported == 0) { - goal_mtu -= sizeof(struct sctp_data_chunk); + space_left -= sizeof(struct sctp_data_chunk); } else { - goal_mtu -= sizeof(struct sctp_idata_chunk); + space_left -= sizeof(struct sctp_idata_chunk); } /* must make even word boundary */ - goal_mtu &= 0xfffffffc; + space_left &= 0xfffffffc; strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc); - while ((goal_mtu > 0) && strq) { - giveup = 0; - bail = 0; - moved_how_much = sctp_move_to_outqueue(stcb, strq, goal_mtu, frag_point, + giveup = 0; + bail = 0; + while ((space_left > 0) && (strq != NULL)) { + moved = sctp_move_to_outqueue(stcb, strq, space_left, frag_point, &giveup, eeor_mode, &bail, so_locked); - stcb->asoc.ss_functions.sctp_ss_scheduled(stcb, net, asoc, strq, moved_how_much); - - if ((giveup) || bail) { + stcb->asoc.ss_functions.sctp_ss_scheduled(stcb, net, asoc, strq, moved); + if ((giveup != 0) || (bail != 0)) { break; } strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc); - if (strq == NULL) { - break; + total_moved += moved; + space_left -= moved; + if (stcb->asoc.idata_supported == 0) { + if (space_left >= sizeof(struct sctp_data_chunk)) { + space_left -= sizeof(struct sctp_data_chunk); + } else { + space_left = 0; + } + } else { + if (space_left >= sizeof(struct sctp_idata_chunk)) { + space_left -= sizeof(struct sctp_idata_chunk); + } else { + space_left = 0; + } } - total_moved += moved_how_much; - goal_mtu -= (moved_how_much + sizeof(struct sctp_data_chunk)); - goal_mtu &= 0xfffffffc; + space_left &= 0xfffffffc; } - if (bail) + if (bail != 0) *quit_now = 1; stcb->asoc.ss_functions.sctp_ss_packet_done(stcb, net, asoc); diff --git a/freebsd/sys/netinet/sctp_pcb.c b/freebsd/sys/netinet/sctp_pcb.c index 4cd7bf0b..3608fd5e 100644 --- a/freebsd/sys/netinet/sctp_pcb.c +++ b/freebsd/sys/netinet/sctp_pcb.c @@ -2471,15 +2471,6 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id) SCTP_INP_INFO_WUNLOCK(); return (ENOBUFS); } -#ifdef IPSEC - error = ipsec_init_policy(so, &inp->ip_inp.inp.inp_sp); - if (error != 0) { - crfree(inp->ip_inp.inp.inp_cred); - SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); - SCTP_INP_INFO_WUNLOCK(); - return error; - } -#endif /* IPSEC */ SCTP_INCR_EP_COUNT(); inp->ip_inp.inp.inp_ip_ttl = MODULE_GLOBAL(ip_defttl); SCTP_INP_INFO_WUNLOCK(); @@ -2506,9 +2497,6 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id) SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EOPNOTSUPP); so->so_pcb = NULL; crfree(inp->ip_inp.inp.inp_cred); -#ifdef IPSEC - ipsec_delete_pcbpolicy(&inp->ip_inp.inp); -#endif SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); return (EOPNOTSUPP); } @@ -2529,9 +2517,6 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id) SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS); so->so_pcb = NULL; crfree(inp->ip_inp.inp.inp_cred); -#ifdef IPSEC - ipsec_delete_pcbpolicy(&inp->ip_inp.inp); -#endif SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); return (ENOBUFS); } @@ -3645,9 +3630,6 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) * macro here since le_next will get freed as part of the * sctp_free_assoc() call. */ -#ifdef IPSEC - ipsec_delete_pcbpolicy(ip_pcb); -#endif if (ip_pcb->inp_options) { (void)sctp_m_free(ip_pcb->inp_options); ip_pcb->inp_options = 0; diff --git a/freebsd/sys/netinet/sctp_timer.c b/freebsd/sys/netinet/sctp_timer.c index 6b4b0be0..6ce9fc30 100644 --- a/freebsd/sys/netinet/sctp_timer.c +++ b/freebsd/sys/netinet/sctp_timer.c @@ -721,13 +721,9 @@ start_again: if (num_mk) { SCTPDBG(SCTP_DEBUG_TIMER1, "LAST TSN marked was %x\n", tsnlast); - SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%ld\n", - num_mk, (u_long)stcb->asoc.peers_rwnd); - SCTPDBG(SCTP_DEBUG_TIMER1, "LAST TSN marked was %x\n", - tsnlast); - SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%d\n", + SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%u\n", num_mk, - (int)stcb->asoc.peers_rwnd); + stcb->asoc.peers_rwnd); } #endif *num_marked = num_mk; diff --git a/freebsd/sys/netinet/sctp_usrreq.c b/freebsd/sys/netinet/sctp_usrreq.c index ebaa58d4..550926f3 100644 --- a/freebsd/sys/netinet/sctp_usrreq.c +++ b/freebsd/sys/netinet/sctp_usrreq.c @@ -110,7 +110,7 @@ sctp_pathmtu_adjustment(struct sctp_tcb *stcb, uint16_t nxtsz) /* Adjust that too */ stcb->asoc.smallest_mtu = nxtsz; /* now off to subtract IP_DF flag if needed */ - overhead = IP_HDR_SIZE; + overhead = IP_HDR_SIZE + sizeof(struct sctphdr); if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) { overhead += sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id); } diff --git a/freebsd/sys/netinet/tcp.h b/freebsd/sys/netinet/tcp.h index 47038104..62a177de 100644 --- a/freebsd/sys/netinet/tcp.h +++ b/freebsd/sys/netinet/tcp.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/tcp_debug.c b/freebsd/sys/netinet/tcp_debug.c index 707e7c5d..3a9b6d6a 100644 --- a/freebsd/sys/netinet/tcp_debug.c +++ b/freebsd/sys/netinet/tcp_debug.c @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/tcp_debug.h b/freebsd/sys/netinet/tcp_debug.h index 511a4ecd..aa26c292 100644 --- a/freebsd/sys/netinet/tcp_debug.h +++ b/freebsd/sys/netinet/tcp_debug.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/tcp_fsm.h b/freebsd/sys/netinet/tcp_fsm.h index 5423e1f1..ddb52084 100644 --- a/freebsd/sys/netinet/tcp_fsm.h +++ b/freebsd/sys/netinet/tcp_fsm.h @@ -11,7 +11,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/tcp_hostcache.c b/freebsd/sys/netinet/tcp_hostcache.c index d26688e5..ef04cf98 100644 --- a/freebsd/sys/netinet/tcp_hostcache.c +++ b/freebsd/sys/netinet/tcp_hostcache.c @@ -625,6 +625,7 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS) struct sbuf sb; int i, error; struct hc_metrics *hc_entry; + char ip4buf[INET_ADDRSTRLEN]; #ifdef INET6 char ip6buf[INET6_ADDRSTRLEN]; #endif @@ -647,7 +648,8 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS) sbuf_printf(&sb, "%-15s %5u %8u %6lums %6lums %8u %8u %8u %4lu " "%4lu %4i\n", - hc_entry->ip4.s_addr ? inet_ntoa(hc_entry->ip4) : + hc_entry->ip4.s_addr ? + inet_ntoa_r(hc_entry->ip4, ip4buf) : #ifdef INET6 ip6_sprintf(ip6buf, &hc_entry->ip6), #else diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c index bc77edbc..d23e1d31 100644 --- a/freebsd/sys/netinet/tcp_input.c +++ b/freebsd/sys/netinet/tcp_input.c @@ -30,7 +30,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -122,10 +122,7 @@ __FBSDID("$FreeBSD$"); #include #endif -#ifdef IPSEC -#include -#include -#endif /*IPSEC*/ +#include #include @@ -489,20 +486,6 @@ cc_post_recovery(struct tcpcb *tp, struct tcphdr *th) tp->t_bytes_acked = 0; } -#ifdef TCP_SIGNATURE -static inline int -tcp_signature_verify_input(struct mbuf *m, int off0, int tlen, int optlen, - struct tcpopt *to, struct tcphdr *th, u_int tcpbflag) -{ - int ret; - - tcp_fields_to_net(th); - ret = tcp_signature_verify(m, off0, tlen, optlen, to, th, tcpbflag); - tcp_fields_to_host(th); - return (ret); -} -#endif - /* * Indicate whether this ack should be delayed. We can delay the ack if * following conditions are met: @@ -613,9 +596,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto) int drop_hdrlen; int thflags; int rstreason = 0; /* For badport_bandlim accounting purposes */ -#ifdef TCP_SIGNATURE - uint8_t sig_checked = 0; -#endif uint8_t iptos; struct m_tag *fwd_tag = NULL; #ifdef INET6 @@ -946,15 +926,22 @@ findpcb: inp->inp_flowid = m->m_pkthdr.flowid; inp->inp_flowtype = M_HASHTYPE_GET(m); } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) #ifdef INET6 - if (isipv6 && ipsec6_in_reject(m, inp)) { + if (isipv6 && IPSEC_ENABLED(ipv6) && + IPSEC_CHECK_POLICY(ipv6, m, inp) != 0) { goto dropunlock; - } else + } +#ifdef INET + else +#endif #endif /* INET6 */ - if (ipsec4_in_reject(m, inp) != 0) { +#ifdef INET + if (IPSEC_ENABLED(ipv4) && + IPSEC_CHECK_POLICY(ipv4, m, inp) != 0) { goto dropunlock; } +#endif /* INET */ #endif /* IPSEC */ /* @@ -1137,7 +1124,16 @@ relocked: * NB: syncache_expand() doesn't unlock * inp and tcpinfo locks. */ - if (!syncache_expand(&inc, &to, th, &so, m)) { + rstreason = syncache_expand(&inc, &to, th, &so, m); + if (rstreason < 0) { + /* + * A failing TCP MD5 signature comparison + * must result in the segment being dropped + * and must not produce any response back + * to the sender. + */ + goto dropunlock; + } else if (rstreason == 0) { /* * No syncache entry or ACK was not * for our SYN/ACK. Send a RST. @@ -1189,26 +1185,6 @@ tfo_socket_result: tp = intotcpcb(inp); KASSERT(tp->t_state == TCPS_SYN_RECEIVED, ("%s: ", __func__)); -#ifdef TCP_SIGNATURE - if (sig_checked == 0) { - tcp_dooptions(&to, optp, optlen, - (thflags & TH_SYN) ? TO_SYN : 0); - if (!tcp_signature_verify_input(m, off0, tlen, - optlen, &to, th, tp->t_flags)) { - - /* - * In SYN_SENT state if it receives an - * RST, it is allowed for further - * processing. - */ - if ((thflags & TH_RST) == 0 || - (tp->t_state == TCPS_SYN_SENT) == 0) - goto dropunlock; - } - sig_checked = 1; - } -#endif - /* * Process the segment and the data it * contains. tcp_do_segment() consumes @@ -1438,26 +1414,18 @@ tfo_socket_result: */ goto dropunlock; } - -#ifdef TCP_SIGNATURE - if (sig_checked == 0) { - tcp_dooptions(&to, optp, optlen, - (thflags & TH_SYN) ? TO_SYN : 0); - if (!tcp_signature_verify_input(m, off0, tlen, optlen, &to, - th, tp->t_flags)) { - - /* - * In SYN_SENT state if it receives an RST, it is - * allowed for further processing. - */ - if ((thflags & TH_RST) == 0 || - (tp->t_state == TCPS_SYN_SENT) == 0) - goto dropunlock; +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (tp->t_flags & TF_SIGNATURE) { + tcp_dooptions(&to, optp, optlen, thflags); + if ((to.to_flags & TOF_SIGNATURE) == 0) { + TCPSTAT_INC(tcps_sig_err_nosigopt); + goto dropunlock; } - sig_checked = 1; + if (!TCPMD5_ENABLED() || + TCPMD5_INPUT(m, th, to.to_signature) != 0) + goto dropunlock; } #endif - TCP_PROBE5(receive, NULL, tp, m, tp, th); /* @@ -1634,6 +1602,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, (th->th_off << 2) - sizeof(struct tcphdr), (thflags & TH_SYN) ? TO_SYN : 0); +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if ((tp->t_flags & TF_SIGNATURE) != 0 && + (to.to_flags & TOF_SIGNATURE) == 0) { + TCPSTAT_INC(tcps_sig_err_sigopt); + /* XXX: should drop? */ + } +#endif /* * If echoed timestamp is later than the current time, * fall back to non RFC1323 RTT calculation. Normalize @@ -3420,20 +3395,19 @@ tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags) (char *)&to->to_tsecr, sizeof(to->to_tsecr)); to->to_tsecr = ntohl(to->to_tsecr); break; -#ifdef TCP_SIGNATURE - /* - * XXX In order to reply to a host which has set the - * TCP_SIGNATURE option in its initial SYN, we have to - * record the fact that the option was observed here - * for the syncache code to perform the correct response. - */ case TCPOPT_SIGNATURE: + /* + * In order to reply to a host which has set the + * TCP_SIGNATURE option in its initial SYN, we have + * to record the fact that the option was observed + * here for the syncache code to perform the correct + * response. + */ if (optlen != TCPOLEN_SIGNATURE) continue; to->to_flags |= TOF_SIGNATURE; to->to_signature = cp + 2; break; -#endif case TCPOPT_SACK_PERMITTED: if (optlen != TCPOLEN_SACK_PERMITTED) continue; @@ -3522,7 +3496,7 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt) TCPSTAT_INC(tcps_rttupdated); tp->t_rttupdated++; - if (tp->t_srtt != 0) { + if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) { /* * srtt is stored as fixed point with 5 bits after the * binary point (i.e., scaled by 8). The following magic diff --git a/freebsd/sys/netinet/tcp_output.c b/freebsd/sys/netinet/tcp_output.c index b39b0bdf..75b52df5 100644 --- a/freebsd/sys/netinet/tcp_output.c +++ b/freebsd/sys/netinet/tcp_output.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -92,9 +92,7 @@ __FBSDID("$FreeBSD$"); #include #endif -#ifdef IPSEC -#include -#endif /*IPSEC*/ +#include #include @@ -202,7 +200,7 @@ tcp_output(struct tcpcb *tp) struct tcphdr *th; u_char opt[TCP_MAXOLEN]; unsigned ipoptlen, optlen, hdrlen; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) unsigned ipsec_optlen = 0; #endif int idle, sendalot; @@ -553,14 +551,23 @@ after_sack_rexmit: * the right thing below to provide length of just ip options and thus * checking for ipoptlen is enough to decide if ip options are present. */ -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Pre-calculate here as we save another lookup into the darknesses * of IPsec that way and can actually decide if TSO is ok. */ - ipsec_optlen = ipsec_hdrsiz_tcp(tp); +#ifdef INET6 + if (isipv6 && IPSEC_ENABLED(ipv6)) + ipsec_optlen = IPSEC_HDRSIZE(ipv6, tp->t_inpcb); +#ifdef INET + else #endif - +#endif /* INET6 */ +#ifdef INET + if (IPSEC_ENABLED(ipv4)) + ipsec_optlen = IPSEC_HDRSIZE(ipv4, tp->t_inpcb); +#endif /* INET */ +#endif /* IPSEC */ #ifdef INET6 if (isipv6) ipoptlen = ip6_optlen(tp->t_inpcb); @@ -571,7 +578,7 @@ after_sack_rexmit: offsetof(struct ipoption, ipopt_list); else ipoptlen = 0; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) ipoptlen += ipsec_optlen; #endif @@ -691,6 +698,8 @@ after_sack_rexmit: recwin <= (so->so_rcv.sb_hiwat / 8) || so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg)) goto send; + if (2 * adv >= (int32_t)so->so_rcv.sb_hiwat) + goto send; } dontupdate: @@ -841,8 +850,12 @@ send: to.to_sacks = (u_char *)tp->sackblks; } } -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* TCP-MD5 (RFC2385). */ + /* + * Check that TCP_MD5SIG is enabled in tcpcb to + * account the size needed to set this TCP option. + */ if (tp->t_flags & TF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif /* TCP_SIGNATURE */ @@ -1255,25 +1268,36 @@ send: */ tp->snd_up = tp->snd_una; /* drag it along */ -#ifdef TCP_SIGNATURE - if (to.to_flags & TOF_SIGNATURE) { - int sigoff = to.to_signature - opt; - tcp_signature_compute(m, 0, len, optlen, - (u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND); - } -#endif - /* * Put TCP length in extended header, and then * checksum extended header and data. */ m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (to.to_flags & TOF_SIGNATURE) { + /* + * Calculate MD5 signature and put it into the place + * determined before. + * NOTE: since TCP options buffer doesn't point into + * mbuf's data, calculate offset and use it. + */ + if (!TCPMD5_ENABLED() || TCPMD5_OUTPUT(m, th, + (u_char *)(th + 1) + (to.to_signature - opt)) != 0) { + /* + * Do not send segment if the calculation of MD5 + * digest has failed. + */ + goto out; + } + } +#endif #ifdef INET6 if (isipv6) { /* - * ip6_plen is not need to be filled now, and will be filled - * in ip6_output. + * There is no need to fill in ip6_plen right now. + * It will be filled later by ip6_output. */ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) + @@ -1306,7 +1330,7 @@ send: m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen; } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL), ("%s: mbuf chain shorter than expected: %d + %u + %u - %u != %u", __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL))); @@ -1355,9 +1379,6 @@ send: */ #ifdef INET6 if (isipv6) { - struct route_in6 ro; - - bzero(&ro, sizeof(ro)); /* * we separately set hoplimit for every segment, since the * user might want to change the value via setsockopt. @@ -1389,13 +1410,13 @@ send: #endif /* TODO: IPv6 IP6TOS_ECT bit on */ - error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro, + error = ip6_output(m, tp->t_inpcb->in6p_outputopts, + &tp->t_inpcb->inp_route6, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), NULL, NULL, tp->t_inpcb); - if (error == EMSGSIZE && ro.ro_rt != NULL) - mtu = ro.ro_rt->rt_mtu; - RO_RTFREE(&ro); + if (error == EMSGSIZE && tp->t_inpcb->inp_route6.ro_rt != NULL) + mtu = tp->t_inpcb->inp_route6.ro_rt->rt_mtu; } #endif /* INET6 */ #if defined(INET) && defined(INET6) @@ -1565,6 +1586,9 @@ timer: } SOCKBUF_UNLOCK_ASSERT(&so->so_snd); /* Check gotos. */ switch (error) { + case EACCES: + tp->t_softerror = error; + return (0); case EPERM: tp->t_softerror = error; return (error); @@ -1732,7 +1756,6 @@ tcp_addoptions(struct tcpopt *to, u_char *optp) bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr)); optp += sizeof(to->to_tsecr); break; -#ifdef TCP_SIGNATURE case TOF_SIGNATURE: { int siglen = TCPOLEN_SIGNATURE - 2; @@ -1741,8 +1764,10 @@ tcp_addoptions(struct tcpopt *to, u_char *optp) optlen += TCPOLEN_NOP; *optp++ = TCPOPT_NOP; } - if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE) + if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE) { + to->to_flags &= ~TOF_SIGNATURE; continue; + } optlen += TCPOLEN_SIGNATURE; *optp++ = TCPOPT_SIGNATURE; *optp++ = TCPOLEN_SIGNATURE; @@ -1751,7 +1776,6 @@ tcp_addoptions(struct tcpopt *to, u_char *optp) *optp++ = 0; break; } -#endif case TOF_SACK: { int sackblks = 0; diff --git a/freebsd/sys/netinet/tcp_reass.c b/freebsd/sys/netinet/tcp_reass.c index 49184a5f..ba973e5c 100644 --- a/freebsd/sys/netinet/tcp_reass.c +++ b/freebsd/sys/netinet/tcp_reass.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/tcp_seq.h b/freebsd/sys/netinet/tcp_seq.h index 666cf603..cfc1ccf7 100644 --- a/freebsd/sys/netinet/tcp_seq.h +++ b/freebsd/sys/netinet/tcp_seq.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/tcp_subr.c b/freebsd/sys/netinet/tcp_subr.c index ae50bb3e..e4e4ca6a 100644 --- a/freebsd/sys/netinet/tcp_subr.c +++ b/freebsd/sys/netinet/tcp_subr.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -124,15 +124,7 @@ __FBSDID("$FreeBSD$"); #include #endif -#ifdef IPSEC -#include -#include -#ifdef INET6 -#include -#endif -#include -#include -#endif /*IPSEC*/ +#include #include #include @@ -239,12 +231,6 @@ static int tcp_soreceive_stream; SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN, &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets"); -#ifdef TCP_SIGNATURE -static int tcp_sig_checksigs = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, signature_verify_input, CTLFLAG_RW, - &tcp_sig_checksigs, 0, "Verify RFC2385 digests on inbound traffic"); -#endif - VNET_DEFINE(uma_zone_t, sack_hole_zone); #define V_sack_hole_zone VNET(sack_hole_zone) @@ -685,6 +671,10 @@ tcp_init(void) V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); +#ifdef TCP_RFC7413 + tcp_fastopen_init(); +#endif + /* Skip initialization of globals for non-default instances. */ if (!IS_DEFAULT_VNET(curvnet)) return; @@ -738,10 +728,6 @@ tcp_init(void) #ifdef TCPPCAP tcp_pcap_init(); #endif - -#ifdef TCP_RFC7413 - tcp_fastopen_init(); -#endif } #ifdef VIMAGE @@ -1070,12 +1056,11 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, to.to_tsecr = tp->ts_recent; to.to_flags |= TOF_TS; } -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* TCP-MD5 (RFC2385). */ if (tp->t_flags & TF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif - /* Add the options. */ tlen += optlen = tcp_addoptions(&to, optp); @@ -1131,10 +1116,13 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, nth->th_win = htons((u_short)win); nth->th_urp = 0; -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (to.to_flags & TOF_SIGNATURE) { - tcp_signature_compute(m, 0, 0, optlen, to.to_signature, - IPSEC_DIR_OUTBOUND); + if (!TCPMD5_ENABLED() || + TCPMD5_OUTPUT(m, nth, to.to_signature) != 0) { + m_freem(m); + return; + } } #endif @@ -1791,30 +1779,8 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xtcpcb xt; - void *inp_ppcb; - - bzero(&xt, sizeof(xt)); - xt.xt_len = sizeof xt; - /* XXX should avoid extra copy */ - bcopy(inp, &xt.xt_inp, sizeof *inp); - inp_ppcb = inp->inp_ppcb; - if (inp_ppcb == NULL) - bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); - else if (inp->inp_flags & INP_TIMEWAIT) { - bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); - xt.xt_tp.t_state = TCPS_TIME_WAIT; - } else { - bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); - if (xt.xt_tp.t_timers) - tcp_timer_to_xtimer(&xt.xt_tp, xt.xt_tp.t_timers, &xt.xt_timer); - } - if (inp->inp_socket != NULL) - sotoxsocket(inp->inp_socket, &xt.xt_socket); - else { - bzero(&xt.xt_socket, sizeof xt.xt_socket); - xt.xt_socket.xso_protocol = IPPROTO_TCP; - } - xt.xt_inp.inp_gencnt = inp->inp_gencnt; + + tcp_inptoxtp(inp, &xt); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xt, sizeof xt); } else @@ -2507,7 +2473,7 @@ tcp_maxseg(const struct tcpcb *tp) optlen = TCPOLEN_TSTAMP_APPA; else optlen = 0; -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (tp->t_flags & TF_SIGNATURE) optlen += PAD(TCPOLEN_SIGNATURE); #endif @@ -2523,7 +2489,7 @@ tcp_maxseg(const struct tcpcb *tp) optlen = PAD(TCPOLEN_MAXSEG); if (tp->t_flags & TF_REQ_SCALE) optlen += PAD(TCPOLEN_WINDOW); -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (tp->t_flags & TF_SIGNATURE) optlen += PAD(TCPOLEN_SIGNATURE); #endif @@ -2535,343 +2501,6 @@ tcp_maxseg(const struct tcpcb *tp) return (tp->t_maxseg - optlen); } -#ifdef IPSEC -/* compute ESP/AH header size for TCP, including outer IP header. */ -size_t -ipsec_hdrsiz_tcp(struct tcpcb *tp) -{ - struct inpcb *inp; - struct mbuf *m; - size_t hdrsiz; - struct ip *ip; -#ifdef INET6 - struct ip6_hdr *ip6; -#endif - struct tcphdr *th; - - if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL) || - (!key_havesp(IPSEC_DIR_OUTBOUND))) - return (0); - m = m_gethdr(M_NOWAIT, MT_DATA); - if (!m) - return (0); - -#ifdef INET6 - if ((inp->inp_vflag & INP_IPV6) != 0) { - ip6 = mtod(m, struct ip6_hdr *); - th = (struct tcphdr *)(ip6 + 1); - m->m_pkthdr.len = m->m_len = - sizeof(struct ip6_hdr) + sizeof(struct tcphdr); - tcpip_fillheaders(inp, ip6, th); - hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); - } else -#endif /* INET6 */ - { - ip = mtod(m, struct ip *); - th = (struct tcphdr *)(ip + 1); - m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); - tcpip_fillheaders(inp, ip, th); - hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); - } - - m_free(m); - return (hdrsiz); -} -#endif /* IPSEC */ - -#ifdef TCP_SIGNATURE -/* - * Callback function invoked by m_apply() to digest TCP segment data - * contained within an mbuf chain. - */ -static int -tcp_signature_apply(void *fstate, void *data, u_int len) -{ - - MD5Update(fstate, (u_char *)data, len); - return (0); -} - -/* - * XXX The key is retrieved from the system's PF_KEY SADB, by keying a - * search with the destination IP address, and a 'magic SPI' to be - * determined by the application. This is hardcoded elsewhere to 1179 -*/ -struct secasvar * -tcp_get_sav(struct mbuf *m, u_int direction) -{ - union sockaddr_union dst; - struct secasvar *sav; - struct ip *ip; -#ifdef INET6 - struct ip6_hdr *ip6; - char ip6buf[INET6_ADDRSTRLEN]; -#endif - - /* Extract the destination from the IP header in the mbuf. */ - bzero(&dst, sizeof(union sockaddr_union)); - ip = mtod(m, struct ip *); -#ifdef INET6 - ip6 = NULL; /* Make the compiler happy. */ -#endif - switch (ip->ip_v) { -#ifdef INET - case IPVERSION: - dst.sa.sa_len = sizeof(struct sockaddr_in); - dst.sa.sa_family = AF_INET; - dst.sin.sin_addr = (direction == IPSEC_DIR_INBOUND) ? - ip->ip_src : ip->ip_dst; - break; -#endif -#ifdef INET6 - case (IPV6_VERSION >> 4): - ip6 = mtod(m, struct ip6_hdr *); - dst.sa.sa_len = sizeof(struct sockaddr_in6); - dst.sa.sa_family = AF_INET6; - dst.sin6.sin6_addr = (direction == IPSEC_DIR_INBOUND) ? - ip6->ip6_src : ip6->ip6_dst; - break; -#endif - default: - return (NULL); - /* NOTREACHED */ - break; - } - - /* Look up an SADB entry which matches the address of the peer. */ - sav = KEY_ALLOCSA(&dst, IPPROTO_TCP, htonl(TCP_SIG_SPI)); - if (sav == NULL) { - ipseclog((LOG_ERR, "%s: SADB lookup failed for %s\n", __func__, - (ip->ip_v == IPVERSION) ? inet_ntoa(dst.sin.sin_addr) : -#ifdef INET6 - (ip->ip_v == (IPV6_VERSION >> 4)) ? - ip6_sprintf(ip6buf, &dst.sin6.sin6_addr) : -#endif - "(unsupported)")); - } - - return (sav); -} - -/* - * Compute TCP-MD5 hash of a TCP segment. (RFC2385) - * - * Parameters: - * m pointer to head of mbuf chain - * len length of TCP segment data, excluding options - * optlen length of TCP segment options - * buf pointer to storage for computed MD5 digest - * sav pointer to security assosiation - * - * We do this over ip, tcphdr, segment data, and the key in the SADB. - * When called from tcp_input(), we can be sure that th_sum has been - * zeroed out and verified already. - * - * Releases reference to SADB key before return. - * - * Return 0 if successful, otherwise return -1. - * - */ -int -tcp_signature_do_compute(struct mbuf *m, int len, int optlen, - u_char *buf, struct secasvar *sav) -{ -#ifdef INET - struct ippseudo ippseudo; -#endif - MD5_CTX ctx; - int doff; - struct ip *ip; -#ifdef INET - struct ipovly *ipovly; -#endif - struct tcphdr *th; -#ifdef INET6 - struct ip6_hdr *ip6; - struct in6_addr in6; - uint32_t plen; - uint16_t nhdr; -#endif - u_short savecsum; - - KASSERT(m != NULL, ("NULL mbuf chain")); - KASSERT(buf != NULL, ("NULL signature pointer")); - - /* Extract the destination from the IP header in the mbuf. */ - ip = mtod(m, struct ip *); -#ifdef INET6 - ip6 = NULL; /* Make the compiler happy. */ -#endif - - MD5Init(&ctx); - /* - * Step 1: Update MD5 hash with IP(v6) pseudo-header. - * - * XXX The ippseudo header MUST be digested in network byte order, - * or else we'll fail the regression test. Assume all fields we've - * been doing arithmetic on have been in host byte order. - * XXX One cannot depend on ipovly->ih_len here. When called from - * tcp_output(), the underlying ip_len member has not yet been set. - */ - switch (ip->ip_v) { -#ifdef INET - case IPVERSION: - ipovly = (struct ipovly *)ip; - ippseudo.ippseudo_src = ipovly->ih_src; - ippseudo.ippseudo_dst = ipovly->ih_dst; - ippseudo.ippseudo_pad = 0; - ippseudo.ippseudo_p = IPPROTO_TCP; - ippseudo.ippseudo_len = htons(len + sizeof(struct tcphdr) + - optlen); - MD5Update(&ctx, (char *)&ippseudo, sizeof(struct ippseudo)); - - th = (struct tcphdr *)((u_char *)ip + sizeof(struct ip)); - doff = sizeof(struct ip) + sizeof(struct tcphdr) + optlen; - break; -#endif -#ifdef INET6 - /* - * RFC 2385, 2.0 Proposal - * For IPv6, the pseudo-header is as described in RFC 2460, namely the - * 128-bit source IPv6 address, 128-bit destination IPv6 address, zero- - * extended next header value (to form 32 bits), and 32-bit segment - * length. - * Note: Upper-Layer Packet Length comes before Next Header. - */ - case (IPV6_VERSION >> 4): - ip6 = mtod(m, struct ip6_hdr *); - in6 = ip6->ip6_src; - in6_clearscope(&in6); - MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr)); - in6 = ip6->ip6_dst; - in6_clearscope(&in6); - MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr)); - plen = htonl(len + sizeof(struct tcphdr) + optlen); - MD5Update(&ctx, (char *)&plen, sizeof(uint32_t)); - nhdr = 0; - MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); - MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); - MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); - nhdr = IPPROTO_TCP; - MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); - - th = (struct tcphdr *)((u_char *)ip6 + sizeof(struct ip6_hdr)); - doff = sizeof(struct ip6_hdr) + sizeof(struct tcphdr) + optlen; - break; -#endif - default: - KEY_FREESAV(&sav); - return (-1); - /* NOTREACHED */ - break; - } - - - /* - * Step 2: Update MD5 hash with TCP header, excluding options. - * The TCP checksum must be set to zero. - */ - savecsum = th->th_sum; - th->th_sum = 0; - MD5Update(&ctx, (char *)th, sizeof(struct tcphdr)); - th->th_sum = savecsum; - - /* - * Step 3: Update MD5 hash with TCP segment data. - * Use m_apply() to avoid an early m_pullup(). - */ - if (len > 0) - m_apply(m, doff, len, tcp_signature_apply, &ctx); - - /* - * Step 4: Update MD5 hash with shared secret. - */ - MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth)); - MD5Final(buf, &ctx); - - key_sa_recordxfer(sav, m); - KEY_FREESAV(&sav); - return (0); -} - -/* - * Compute TCP-MD5 hash of a TCP segment. (RFC2385) - * - * Return 0 if successful, otherwise return -1. - */ -int -tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen, - u_char *buf, u_int direction) -{ - struct secasvar *sav; - - if ((sav = tcp_get_sav(m, direction)) == NULL) - return (-1); - - return (tcp_signature_do_compute(m, len, optlen, buf, sav)); -} - -/* - * Verify the TCP-MD5 hash of a TCP segment. (RFC2385) - * - * Parameters: - * m pointer to head of mbuf chain - * len length of TCP segment data, excluding options - * optlen length of TCP segment options - * buf pointer to storage for computed MD5 digest - * direction direction of flow (IPSEC_DIR_INBOUND or OUTBOUND) - * - * Return 1 if successful, otherwise return 0. - */ -int -tcp_signature_verify(struct mbuf *m, int off0, int tlen, int optlen, - struct tcpopt *to, struct tcphdr *th, u_int tcpbflag) -{ - char tmpdigest[TCP_SIGLEN]; - - if (tcp_sig_checksigs == 0) - return (1); - if ((tcpbflag & TF_SIGNATURE) == 0) { - if ((to->to_flags & TOF_SIGNATURE) != 0) { - - /* - * If this socket is not expecting signature but - * the segment contains signature just fail. - */ - TCPSTAT_INC(tcps_sig_err_sigopt); - TCPSTAT_INC(tcps_sig_rcvbadsig); - return (0); - } - - /* Signature is not expected, and not present in segment. */ - return (1); - } - - /* - * If this socket is expecting signature but the segment does not - * contain any just fail. - */ - if ((to->to_flags & TOF_SIGNATURE) == 0) { - TCPSTAT_INC(tcps_sig_err_nosigopt); - TCPSTAT_INC(tcps_sig_rcvbadsig); - return (0); - } - if (tcp_signature_compute(m, off0, tlen, optlen, &tmpdigest[0], - IPSEC_DIR_INBOUND) == -1) { - TCPSTAT_INC(tcps_sig_err_buildsig); - TCPSTAT_INC(tcps_sig_rcvbadsig); - return (0); - } - - if (bcmp(to->to_signature, &tmpdigest[0], TCP_SIGLEN) != 0) { - TCPSTAT_INC(tcps_sig_rcvbadsig); - return (0); - } - TCPSTAT_INC(tcps_sig_rcvgoodsig); - return (1); -} -#endif /* TCP_SIGNATURE */ - static int sysctl_drop(SYSCTL_HANDLER_ARGS) { @@ -3120,3 +2749,53 @@ tcp_state_change(struct tcpcb *tp, int newstate) tp->t_state = newstate; TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate); } + +/* + * Create an external-format (``xtcpcb'') structure using the information in + * the kernel-format tcpcb structure pointed to by tp. This is done to + * reduce the spew of irrelevant information over this interface, to isolate + * user code from changes in the kernel structure, and potentially to provide + * information-hiding if we decide that some of this information should be + * hidden from users. + */ +void +tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *xt) +{ + struct tcpcb *tp = intotcpcb(inp); + sbintime_t now; + + if (inp->inp_flags & INP_TIMEWAIT) { + bzero(xt, sizeof(struct xtcpcb)); + xt->t_state = TCPS_TIME_WAIT; + } else { + xt->t_state = tp->t_state; + xt->t_flags = tp->t_flags; + xt->t_sndzerowin = tp->t_sndzerowin; + xt->t_sndrexmitpack = tp->t_sndrexmitpack; + xt->t_rcvoopack = tp->t_rcvoopack; + + now = getsbinuptime(); +#define COPYTIMER(ttt) do { \ + if (callout_active(&tp->t_timers->ttt)) \ + xt->ttt = (tp->t_timers->ttt.c_time - now) / \ + SBT_1MS; \ + else \ + xt->ttt = 0; \ +} while (0) + COPYTIMER(tt_delack); + COPYTIMER(tt_rexmt); + COPYTIMER(tt_persist); + COPYTIMER(tt_keep); + COPYTIMER(tt_2msl); +#undef COPYTIMER + xt->t_rcvtime = 1000 * (ticks - tp->t_rcvtime) / hz; + + bcopy(tp->t_fb->tfb_tcp_block_name, xt->xt_stack, + TCP_FUNCTION_NAME_LEN_MAX); + } + + xt->xt_len = sizeof(struct xtcpcb); + in_pcbtoxinpcb(inp, &xt->xt_inp); + if (inp->inp_socket == NULL) + xt->xt_inp.xi_socket.xso_protocol = IPPROTO_TCP; +} diff --git a/freebsd/sys/netinet/tcp_syncache.c b/freebsd/sys/netinet/tcp_syncache.c index 6d05be85..78303625 100644 --- a/freebsd/sys/netinet/tcp_syncache.c +++ b/freebsd/sys/netinet/tcp_syncache.c @@ -98,13 +98,7 @@ __FBSDID("$FreeBSD$"); #include #endif -#ifdef IPSEC -#include -#ifdef INET6 -#include -#endif -#include -#endif /*IPSEC*/ +#include #include @@ -122,6 +116,14 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_syncookiesonly), 0, "Use only TCP SYN cookies"); +static VNET_DEFINE(int, functions_inherit_listen_socket_stack) = 1; +#define V_functions_inherit_listen_socket_stack \ + VNET(functions_inherit_listen_socket_stack) +SYSCTL_INT(_net_inet_tcp, OID_AUTO, functions_inherit_listen_socket_stack, + CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(functions_inherit_listen_socket_stack), 0, + "Inherit listen socket's stack"); + #ifdef TCP_OFFLOAD #define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL) #endif @@ -738,11 +740,6 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) INP_HASH_WUNLOCK(&V_tcbinfo); goto abort; } -#ifdef IPSEC - /* Copy old policy into new socket's. */ - if (ipsec_copy_policy(sotoinpcb(lso)->inp_sp, inp->inp_sp)) - printf("syncache_socket: could not copy policy\n"); -#endif #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) { struct inpcb *oinp = sotoinpcb(lso); @@ -832,6 +829,11 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) } } #endif /* INET */ +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + /* Copy old policy into new socket's. */ + if (ipsec_copy_pcbpolicy(sotoinpcb(lso), inp) != 0) + printf("syncache_socket: could not copy policy\n"); +#endif INP_HASH_WUNLOCK(&V_tcbinfo); tp = intotcpcb(inp); tcp_state_change(tp, TCPS_SYN_RECEIVED); @@ -840,7 +842,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) tcp_rcvseqinit(tp); tcp_sendseqinit(tp); blk = sototcpcb(lso)->t_fb; - if (blk != tp->t_fb) { + if (V_functions_inherit_listen_socket_stack && blk != tp->t_fb) { /* * Our parents t_fb was not the default, * we need to release our ref on tp->t_fb and @@ -882,7 +884,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) tp->ts_recent_age = tcp_ts_getticks(); tp->ts_offset = sc->sc_tsoff; } -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (sc->sc_flags & SCF_SIGNATURE) tp->t_flags |= TF_SIGNATURE; #endif @@ -1006,7 +1008,57 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, "(probably spoofed)\n", s, __func__); goto failed; } +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + /* If received ACK has MD5 signature, check it. */ + if ((to->to_flags & TOF_SIGNATURE) != 0 && + (!TCPMD5_ENABLED() || + TCPMD5_INPUT(m, th, to->to_signature) != 0)) { + /* Drop the ACK. */ + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: Segment rejected, " + "MD5 signature doesn't match.\n", + s, __func__); + free(s, M_TCPLOG); + } + TCPSTAT_INC(tcps_sig_err_sigopt); + return (-1); /* Do not send RST */ + } +#endif /* TCP_SIGNATURE */ } else { +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + /* + * If listening socket requested TCP digests, check that + * received ACK has signature and it is correct. + * If not, drop the ACK and leave sc entry in th cache, + * because SYN was received with correct signature. + */ + if (sc->sc_flags & SCF_SIGNATURE) { + if ((to->to_flags & TOF_SIGNATURE) == 0) { + /* No signature */ + TCPSTAT_INC(tcps_sig_err_nosigopt); + SCH_UNLOCK(sch); + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: Segment " + "rejected, MD5 signature wasn't " + "provided.\n", s, __func__); + free(s, M_TCPLOG); + } + return (-1); /* Do not send RST */ + } + if (!TCPMD5_ENABLED() || + TCPMD5_INPUT(m, th, to->to_signature) != 0) { + /* Doesn't match or no SA */ + SCH_UNLOCK(sch); + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: Segment " + "rejected, MD5 signature doesn't " + "match.\n", s, __func__); + free(s, M_TCPLOG); + } + return (-1); /* Do not send RST */ + } + } +#endif /* TCP_SIGNATURE */ /* * Pull out the entry to unlock the bucket row. * @@ -1276,6 +1328,22 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, ipopts = NULL; #endif +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + /* + * If listening socket requested TCP digests, check that received + * SYN has signature and it is correct. If signature doesn't match + * or TCP_SIGNATURE support isn't enabled, drop the packet. + */ + if (ltflags & TF_SIGNATURE) { + if ((to->to_flags & TOF_SIGNATURE) == 0) { + TCPSTAT_INC(tcps_sig_err_nosigopt); + goto done; + } + if (!TCPMD5_ENABLED() || + TCPMD5_INPUT(m, th, to->to_signature) != 0) + goto done; + } +#endif /* TCP_SIGNATURE */ /* * See if we already have an entry for this connection. * If we do, resend the SYN,ACK, and reset the retransmit timer. @@ -1451,15 +1519,15 @@ skip_alloc: sc->sc_flags |= SCF_WINSCALE; } } -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* - * If listening socket requested TCP digests, OR received SYN - * contains the option, flag this in the syncache so that - * syncache_respond() will do the right thing with the SYN+ACK. + * If listening socket requested TCP digests, flag this in the + * syncache so that syncache_respond() will do the right thing + * with the SYN+ACK. */ - if (to->to_flags & TOF_SIGNATURE || ltflags & TF_SIGNATURE) + if (ltflags & TF_SIGNATURE) sc->sc_flags |= SCF_SIGNATURE; -#endif +#endif /* TCP_SIGNATURE */ if (to->to_flags & TOF_SACKPERM) sc->sc_flags |= SCF_SACK; if (to->to_flags & TOF_MSS) @@ -1550,10 +1618,6 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked, #ifdef INET6 struct ip6_hdr *ip6 = NULL; #endif -#ifdef TCP_SIGNATURE - struct secasvar *sav; -#endif - hlen = #ifdef INET6 (sc->sc_inc.inc_flags & INC_ISIPV6) ? sizeof(struct ip6_hdr) : @@ -1662,32 +1726,10 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked, } if (sc->sc_flags & SCF_SACK) to.to_flags |= TOF_SACKPERM; -#ifdef TCP_SIGNATURE - sav = NULL; - if (sc->sc_flags & SCF_SIGNATURE) { - sav = tcp_get_sav(m, IPSEC_DIR_OUTBOUND); - if (sav != NULL) - to.to_flags |= TOF_SIGNATURE; - else { - - /* - * We've got SCF_SIGNATURE flag - * inherited from listening socket, - * but no SADB key for given source - * address. Assume signature is not - * required and remove signature flag - * instead of silently dropping - * connection. - */ - if (locked == 0) - SCH_LOCK(sch); - sc->sc_flags &= ~SCF_SIGNATURE; - if (locked == 0) - SCH_UNLOCK(sch); - } - } +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (sc->sc_flags & SCF_SIGNATURE) + to.to_flags |= TOF_SIGNATURE; #endif - #ifdef TCP_RFC7413 if (sc->sc_tfo_cookie) { to.to_flags |= TOF_FASTOPEN; @@ -1703,18 +1745,25 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked, th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; m->m_len += optlen; m->m_pkthdr.len += optlen; - -#ifdef TCP_SIGNATURE - if (sc->sc_flags & SCF_SIGNATURE) - tcp_signature_do_compute(m, 0, optlen, - to.to_signature, sav); -#endif #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen); else #endif ip->ip_len = htons(ntohs(ip->ip_len) + optlen); +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (sc->sc_flags & SCF_SIGNATURE) { + KASSERT(to.to_flags & TOF_SIGNATURE, + ("tcp_addoptions() didn't set tcp_signature")); + + /* NOTE: to.to_signature is inside of mbuf */ + if (!TCPMD5_ENABLED() || + TCPMD5_OUTPUT(m, th, to.to_signature) != 0) { + m_freem(m); + return (EACCES); + } + } +#endif } else optlen = 0; @@ -2178,13 +2227,13 @@ syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported) xt.xt_inp.inp_vflag = INP_IPV6; else xt.xt_inp.inp_vflag = INP_IPV4; - bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, sizeof (struct in_conninfo)); - xt.xt_tp.t_inpcb = &xt.xt_inp; - xt.xt_tp.t_state = TCPS_SYN_RECEIVED; - xt.xt_socket.xso_protocol = IPPROTO_TCP; - xt.xt_socket.xso_len = sizeof (struct xsocket); - xt.xt_socket.so_type = SOCK_STREAM; - xt.xt_socket.so_state = SS_ISCONNECTING; + bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, + sizeof (struct in_conninfo)); + xt.t_state = TCPS_SYN_RECEIVED; + xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP; + xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket); + xt.xt_inp.xi_socket.so_type = SOCK_STREAM; + xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING; error = SYSCTL_OUT(req, &xt, sizeof xt); if (error) { SCH_UNLOCK(sch); diff --git a/freebsd/sys/netinet/tcp_syncache.h b/freebsd/sys/netinet/tcp_syncache.h index 6b12c13a..2c8c5b00 100644 --- a/freebsd/sys/netinet/tcp_syncache.h +++ b/freebsd/sys/netinet/tcp_syncache.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/tcp_timer.c b/freebsd/sys/netinet/tcp_timer.c index 89b61ad8..4743b4fd 100644 --- a/freebsd/sys/netinet/tcp_timer.c +++ b/freebsd/sys/netinet/tcp_timer.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -847,20 +847,16 @@ tcp_timer_rexmt(void * xtp) (tp->t_rxtshift == 3)) tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT); /* - * If we backed off this far, our srtt estimate is probably bogus. - * Clobber it so we'll take the next rtt measurement as our srtt; - * move the current srtt into rttvar to keep the current - * retransmit times until then. + * If we backed off this far, notify the L3 protocol that we're having + * connection problems. */ - if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { + if (tp->t_rxtshift > TCP_RTT_INVALIDATE) { #ifdef INET6 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) in6_losing(tp->t_inpcb); else #endif in_losing(tp->t_inpcb); - tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); - tp->t_srtt = 0; } tp->snd_nxt = tp->snd_una; tp->snd_recover = tp->snd_max; @@ -1012,28 +1008,3 @@ tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type) tp->t_timers->tt_draincnt++; } } - -#define ticks_to_msecs(t) (1000*(t) / hz) - -void -tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, - struct xtcp_timer *xtimer) -{ - sbintime_t now; - - bzero(xtimer, sizeof(*xtimer)); - if (timer == NULL) - return; - now = getsbinuptime(); - if (callout_active(&timer->tt_delack)) - xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS; - if (callout_active(&timer->tt_rexmt)) - xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS; - if (callout_active(&timer->tt_persist)) - xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS; - if (callout_active(&timer->tt_keep)) - xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS; - if (callout_active(&timer->tt_2msl)) - xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS; - xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); -} diff --git a/freebsd/sys/netinet/tcp_timer.h b/freebsd/sys/netinet/tcp_timer.h index bb78062d..f14f929a 100644 --- a/freebsd/sys/netinet/tcp_timer.h +++ b/freebsd/sys/netinet/tcp_timer.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -119,6 +119,13 @@ #define TCPTV_DELACK ( hz/10 ) /* 100ms timeout */ +/* + * If we exceed this number of retransmits for a single segment, we'll consider + * the current srtt measurement no longer valid and will recalculate from + * scratch starting with the next ACK. + */ +#define TCP_RTT_INVALIDATE (TCP_MAXRXTSHIFT / 4) + #ifdef TCPTIMERS static const char *tcptimers[] = { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK" }; @@ -203,8 +210,6 @@ void tcp_timer_keep(void *xtp); void tcp_timer_persist(void *xtp); void tcp_timer_rexmt(void *xtp); void tcp_timer_delack(void *xtp); -void tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, - struct xtcp_timer *xtimer); #endif /* _KERNEL */ diff --git a/freebsd/sys/netinet/tcp_timewait.c b/freebsd/sys/netinet/tcp_timewait.c index 7eb05462..8ff6e63b 100644 --- a/freebsd/sys/netinet/tcp_timewait.c +++ b/freebsd/sys/netinet/tcp_timewait.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/tcp_usrreq.c b/freebsd/sys/netinet/tcp_usrreq.c index 436f30f8..314bc954 100644 --- a/freebsd/sys/netinet/tcp_usrreq.c +++ b/freebsd/sys/netinet/tcp_usrreq.c @@ -18,7 +18,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -103,6 +104,7 @@ __FBSDID("$FreeBSD$"); #ifdef TCP_OFFLOAD #include #endif +#include /* * TCP protocol interface to socket abstraction. @@ -1554,21 +1556,17 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) case TCP_MD5SIG: - INP_WUNLOCK(inp); - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); + if (!TCPMD5_ENABLED()) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + error = TCPMD5_PCBCTL(inp, sopt); if (error) return (error); - - INP_WLOCK_RECHECK(inp); - if (optval > 0) - tp->t_flags |= TF_SIGNATURE; - else - tp->t_flags &= ~TF_SIGNATURE; goto unlock_and_done; -#endif /* TCP_SIGNATURE */ +#endif /* IPSEC */ case TCP_NODELAY: case TCP_NOOPT: @@ -1794,11 +1792,13 @@ unlock_and_done: case SOPT_GET: tp = intotcpcb(inp); switch (sopt->sopt_name) { -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) case TCP_MD5SIG: - optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; - INP_WUNLOCK(inp); - error = sooptcopyout(sopt, &optval, sizeof optval); + if (!TCPMD5_ENABLED()) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + error = TCPMD5_PCBCTL(inp, sopt); break; #endif diff --git a/freebsd/sys/netinet/tcp_var.h b/freebsd/sys/netinet/tcp_var.h index f4ea246b..5705e553 100644 --- a/freebsd/sys/netinet/tcp_var.h +++ b/freebsd/sys/netinet/tcp_var.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -39,15 +39,9 @@ #ifdef _KERNEL #include #include +#endif -/* - * Kernel variables for tcp. - */ -VNET_DECLARE(int, tcp_do_rfc1323); -#define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323) - -#endif /* _KERNEL */ - +#if defined(_KERNEL) || defined(_WANT_TCPCB) /* TCP segment queue entry */ struct tseg_qent { LIST_ENTRY(tseg_qent) tqe_q; @@ -83,90 +77,12 @@ struct sackhint { uint64_t _pad[1]; /* TBD */ }; -struct tcptemp { - u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */ - struct tcphdr tt_t; -}; - -#define tcp6cb tcpcb /* for KAME src sync over BSD*'s */ - -/* - * TODO: We yet need to brave plowing in - * to tcp_input() and the pru_usrreq() block. - * Right now these go to the old standards which - * are somewhat ok, but in the long term may - * need to be changed. If we do tackle tcp_input() - * then we need to get rid of the tcp_do_segment() - * function below. - */ -/* Flags for tcp functions */ -#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */ -struct tcpcb; -struct inpcb; -struct sockopt; -struct socket; - -/* - * If defining the optional tcp_timers, in the - * tfb_tcp_timer_stop call you must use the - * callout_async_drain() function with the - * tcp_timer_discard callback. You should check - * the return of callout_async_drain() and if 0 - * increment tt_draincnt. Since the timer sub-system - * does not know your callbacks you must provide a - * stop_all function that loops through and calls - * tcp_timer_stop() with each of your defined timers. - * Adding a tfb_tcp_handoff_ok function allows the socket - * option to change stacks to query you even if the - * connection is in a later stage. You return 0 to - * say you can take over and run your stack, you return - * non-zero (an error number) to say no you can't. - * If the function is undefined you can only change - * in the early states (before connect or listen). - * tfb_tcp_fb_fini is changed to add a flag to tell - * the old stack if the tcb is being destroyed or - * not. A one in the flag means the TCB is being - * destroyed, a zero indicates its transitioning to - * another stack (via socket option). - */ -struct tcp_function_block { - char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX]; - int (*tfb_tcp_output)(struct tcpcb *); - void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *, - struct socket *, struct tcpcb *, - int, int, uint8_t, - int); - int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt, - struct inpcb *inp, struct tcpcb *tp); - /* Optional memory allocation/free routine */ - void (*tfb_tcp_fb_init)(struct tcpcb *); - void (*tfb_tcp_fb_fini)(struct tcpcb *, int); - /* Optional timers, must define all if you define one */ - int (*tfb_tcp_timer_stop_all)(struct tcpcb *); - void (*tfb_tcp_timer_activate)(struct tcpcb *, - uint32_t, u_int); - int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t); - void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t); - void (*tfb_tcp_rexmit_tmr)(struct tcpcb *); - int (*tfb_tcp_handoff_ok)(struct tcpcb *); - volatile uint32_t tfb_refcnt; - uint32_t tfb_flags; -}; - -struct tcp_function { - TAILQ_ENTRY(tcp_function) tf_next; - struct tcp_function_block *tf_fb; -}; - -TAILQ_HEAD(tcp_funchead, tcp_function); - /* * Tcp control block, one per tcp; fields: * Organized for 16 byte cacheline efficiency. */ struct tcpcb { struct tsegqe_head t_segq; /* segment reassembly queue */ - void *t_pspare[2]; /* new reassembly queue */ int t_segqlen; /* segment reassembly queue length */ int t_dupacks; /* consecutive dup acks recd */ @@ -197,12 +113,10 @@ struct tcpcb { uint32_t snd_wnd; /* send window */ uint32_t snd_cwnd; /* congestion-controlled window */ - u_long snd_spare1; /* unused */ uint32_t snd_ssthresh; /* snd_cwnd size threshold for * for slow start exponential to * linear switch */ - u_long snd_spare2; /* unused */ tcp_seq snd_recover; /* for use in NewReno Fast Recovery */ u_int t_rcvtime; /* inactivity time */ @@ -210,9 +124,6 @@ struct tcpcb { u_int t_rtttime; /* RTT measurement start time */ tcp_seq t_rtseq; /* sequence number being timed */ - u_int t_bw_spare1; /* unused */ - tcp_seq t_bw_spare2; /* unused */ - int t_rxtcur; /* current retransmit value (ticks) */ u_int t_maxseg; /* maximum segment size */ u_int t_pmtud_saved_maxseg; /* pre-blackhole MSS */ @@ -276,33 +187,98 @@ struct tcpcb { u_int t_tsomaxsegcount; /* TSO maximum segment count */ u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */ u_int t_flags2; /* More tcpcb flags storage */ -#if defined(_KERNEL) && defined(TCP_RFC7413) - uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */ - uint64_t t_tfo_cookie; /* TCP Fast Open cookie */ -#else - uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */ -#endif struct tcp_function_block *t_fb;/* TCP function call block */ void *t_fb_ptr; /* Pointer to t_fb specific data */ -#if defined(_KERNEL) && defined(TCP_RFC7413) +#ifdef TCP_RFC7413 + uint64_t t_tfo_cookie; /* TCP Fast Open cookie */ unsigned int *t_tfo_pending; /* TCP Fast Open pending counter */ - void *t_pspare2[1]; /* 1 TCP_SIGNATURE */ -#else - void *t_pspare2[2]; /* 1 TCP_SIGNATURE, 1 TBD */ #endif -#if defined(_KERNEL) && defined(TCPPCAP) +#ifdef TCPPCAP struct mbufq t_inpkts; /* List of saved input packets. */ struct mbufq t_outpkts; /* List of saved output packets. */ -#ifdef _LP64 - uint64_t _pad[0]; /* all used! */ -#else - uint64_t _pad[2]; /* 2 are available */ -#endif /* _LP64 */ -#else - uint64_t _pad[6]; -#endif /* defined(_KERNEL) && defined(TCPPCAP) */ +#endif +}; +#endif /* _KERNEL || _WANT_TCPCB */ + +#ifdef _KERNEL +/* + * Kernel variables for tcp. + */ +VNET_DECLARE(int, tcp_do_rfc1323); +#define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323) + +struct tcptemp { + u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */ + struct tcphdr tt_t; }; +/* + * TODO: We yet need to brave plowing in + * to tcp_input() and the pru_usrreq() block. + * Right now these go to the old standards which + * are somewhat ok, but in the long term may + * need to be changed. If we do tackle tcp_input() + * then we need to get rid of the tcp_do_segment() + * function below. + */ +/* Flags for tcp functions */ +#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */ + +/* + * If defining the optional tcp_timers, in the + * tfb_tcp_timer_stop call you must use the + * callout_async_drain() function with the + * tcp_timer_discard callback. You should check + * the return of callout_async_drain() and if 0 + * increment tt_draincnt. Since the timer sub-system + * does not know your callbacks you must provide a + * stop_all function that loops through and calls + * tcp_timer_stop() with each of your defined timers. + * Adding a tfb_tcp_handoff_ok function allows the socket + * option to change stacks to query you even if the + * connection is in a later stage. You return 0 to + * say you can take over and run your stack, you return + * non-zero (an error number) to say no you can't. + * If the function is undefined you can only change + * in the early states (before connect or listen). + * tfb_tcp_fb_fini is changed to add a flag to tell + * the old stack if the tcb is being destroyed or + * not. A one in the flag means the TCB is being + * destroyed, a zero indicates its transitioning to + * another stack (via socket option). + */ +struct tcp_function_block { + char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX]; + int (*tfb_tcp_output)(struct tcpcb *); + void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *, + struct socket *, struct tcpcb *, + int, int, uint8_t, + int); + int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt, + struct inpcb *inp, struct tcpcb *tp); + /* Optional memory allocation/free routine */ + void (*tfb_tcp_fb_init)(struct tcpcb *); + void (*tfb_tcp_fb_fini)(struct tcpcb *, int); + /* Optional timers, must define all if you define one */ + int (*tfb_tcp_timer_stop_all)(struct tcpcb *); + void (*tfb_tcp_timer_activate)(struct tcpcb *, + uint32_t, u_int); + int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t); + void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t); + void (*tfb_tcp_rexmit_tmr)(struct tcpcb *); + int (*tfb_tcp_handoff_ok)(struct tcpcb *); + volatile uint32_t tfb_refcnt; + uint32_t tfb_flags; +}; + +struct tcp_function { + TAILQ_ENTRY(tcp_function) tf_next; + struct tcp_function_block *tf_fb; +}; + +TAILQ_HEAD(tcp_funchead, tcp_function); +#endif /* _KERNEL */ + /* * Flags and utility macros for the t_flags field. */ @@ -363,21 +339,6 @@ struct tcpcb { #define TCPOOB_HAVEDATA 0x01 #define TCPOOB_HADDATA 0x02 -#ifdef TCP_SIGNATURE -/* - * Defines which are needed by the xform_tcp module and tcp_[in|out]put - * for SADB verification and lookup. - */ -#define TCP_SIGLEN 16 /* length of computed digest in bytes */ -#define TCP_KEYLEN_MIN 1 /* minimum length of TCP-MD5 key */ -#define TCP_KEYLEN_MAX 80 /* maximum length of TCP-MD5 key */ -/* - * Only a single SA per host may be specified at this time. An SPI is - * needed in order for the KEY_ALLOCSA() lookup to work. - */ -#define TCP_SIG_SPI 0x1000 -#endif /* TCP_SIGNATURE */ - /* * Flags for PLPMTU handling, t_flags2 */ @@ -452,7 +413,7 @@ struct tcptw { tcp_seq iss; tcp_seq irs; u_short last_win; /* cached window value */ - u_short tw_so_options; /* copy of so_options */ + short tw_so_options; /* copy of so_options */ struct ucred *tw_cred; /* user credentials */ u_int32_t t_recent; u_int32_t ts_offset; /* our timestamp offset */ @@ -614,7 +575,7 @@ struct tcpstat { /* TCP_SIGNATURE related stats */ uint64_t tcps_sig_rcvgoodsig; /* Total matching signature received */ uint64_t tcps_sig_rcvbadsig; /* Total bad signature received */ - uint64_t tcps_sig_err_buildsig; /* Mismatching signature received */ + uint64_t tcps_sig_err_buildsig; /* Failed to make signature */ uint64_t tcps_sig_err_sigopt; /* No signature expected by socket */ uint64_t tcps_sig_err_nosigopt; /* No signature provided by segment */ @@ -671,26 +632,41 @@ struct tcp_hhook_data { /* * TCB structure exported to user-land via sysctl(3). + * + * Fields prefixed with "xt_" are unique to the export structure, and fields + * with "t_" or other prefixes match corresponding fields of 'struct tcpcb'. + * + * Legend: + * (s) - used by userland utilities in src + * (p) - used by utilities in ports + * (3) - is known to be used by third party software not in ports + * (n) - no known usage + * * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been * included. Not all of our clients do. */ #if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_) -struct xtcp_timer { - int tt_rexmt; /* retransmit timer */ - int tt_persist; /* retransmit persistence */ - int tt_keep; /* keepalive */ - int tt_2msl; /* 2*msl TIME_WAIT timer */ - int tt_delack; /* delayed ACK timer */ - int t_rcvtime; /* Time since last packet received */ -}; -struct xtcpcb { - size_t xt_len; - struct inpcb xt_inp; - struct tcpcb xt_tp; - struct xsocket xt_socket; - struct xtcp_timer xt_timer; - u_quad_t xt_alignment_hack; -}; +struct xtcpcb { + size_t xt_len; /* length of this structure */ + struct xinpcb xt_inp; + char xt_stack[TCP_FUNCTION_NAME_LEN_MAX]; /* (n) */ + int64_t spare64[8]; + int32_t t_state; /* (s,p) */ + uint32_t t_flags; /* (s,p) */ + int32_t t_sndzerowin; /* (s) */ + int32_t t_sndrexmitpack; /* (s) */ + int32_t t_rcvoopack; /* (s) */ + int32_t t_rcvtime; /* (s) */ + int32_t tt_rexmt; /* (s) */ + int32_t tt_persist; /* (s) */ + int32_t tt_keep; /* (s) */ + int32_t tt_2msl; /* (s) */ + int32_t tt_delack; /* (s) */ + int32_t spare32[32]; +} __aligned(8); +#ifdef _KERNEL +void tcp_inptoxtp(const struct inpcb *, struct xtcpcb *); +#endif #endif /* @@ -835,17 +811,6 @@ void tcp_tw_zone_change(void); int tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *, struct mbuf *, int); void tcp_setpersist(struct tcpcb *); -#ifdef TCP_SIGNATURE -struct secasvar; -struct secasvar *tcp_get_sav(struct mbuf *, u_int); -int tcp_signature_do_compute(struct mbuf *, int, int, u_char *, - struct secasvar *); -int tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int); -int tcp_signature_verify(struct mbuf *, int, int, int, struct tcpopt *, - struct tcphdr *, u_int); -int tcp_signature_check(struct mbuf *m, int off0, int tlen, int optlen, - struct tcpopt *to, struct tcphdr *th, u_int tcpbflag); -#endif void tcp_slowtimo(void); struct tcptemp * tcpip_maketemplate(struct inpcb *); @@ -889,7 +854,6 @@ tcp_fields_to_host(struct tcphdr *th) th->th_urp = ntohs(th->th_urp); } -#ifdef TCP_SIGNATURE static inline void tcp_fields_to_net(struct tcphdr *th) { @@ -899,7 +863,6 @@ tcp_fields_to_net(struct tcphdr *th) th->th_win = htons(th->th_win); th->th_urp = htons(th->th_urp); } -#endif #endif /* _KERNEL */ #endif /* _NETINET_TCP_VAR_H_ */ diff --git a/freebsd/sys/netinet/tcpip.h b/freebsd/sys/netinet/tcpip.h index 3a89d5d5..45c1095a 100644 --- a/freebsd/sys/netinet/tcpip.h +++ b/freebsd/sys/netinet/tcpip.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet/udp.h b/freebsd/sys/netinet/udp.h index c2d638dd..7b18df42 100644 --- a/freebsd/sys/netinet/udp.h +++ b/freebsd/sys/netinet/udp.h @@ -11,7 +11,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -58,7 +58,7 @@ struct udphdr { */ /* Encapsulation types. */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ -#define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-02+ */ +#define UDP_ENCAP_ESPINUDP 2 /* RFC3948 */ /* Default ESP in UDP encapsulation port. */ #define UDP_ENCAP_ESPINUDP_PORT 500 diff --git a/freebsd/sys/netinet/udp_usrreq.c b/freebsd/sys/netinet/udp_usrreq.c index 42461ce9..093b7f32 100644 --- a/freebsd/sys/netinet/udp_usrreq.c +++ b/freebsd/sys/netinet/udp_usrreq.c @@ -19,7 +19,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -98,10 +98,7 @@ __FBSDID("$FreeBSD$"); #include #include -#ifdef IPSEC -#include -#include -#endif +#include #include @@ -178,15 +175,6 @@ static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *, struct mbuf *, struct thread *); #endif -#ifdef IPSEC -#ifdef IPSEC_NAT_T -#define UF_ESPINUDP_ALL (UF_ESPINUDP_NON_IKE|UF_ESPINUDP) -#ifdef INET -static struct mbuf *udp4_espdecap(struct inpcb *, struct mbuf *, int); -#endif -#endif /* IPSEC_NAT_T */ -#endif /* IPSEC */ - static void udp_zone_change(void *tag) { @@ -322,7 +310,7 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, { struct sockaddr *append_sa; struct socket *so; - struct mbuf *opts = NULL; + struct mbuf *tmpopts, *opts = NULL; #ifdef INET6 struct sockaddr_in6 udp_in6; #endif @@ -337,7 +325,7 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, if (up->u_tun_func != NULL) { in_pcbref(inp); INP_RUNLOCK(inp); - (*up->u_tun_func)(n, off, inp, (struct sockaddr *)udp_in, + (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&udp_in[0], up->u_tun_ctx); INP_RLOCK(inp); return (in_pcbrele_rlocked(inp)); @@ -345,21 +333,18 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, off += sizeof(struct udphdr); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* Check AH/ESP integrity. */ - if (ipsec4_in_reject(n, inp)) { + if (IPSEC_ENABLED(ipv4) && + IPSEC_CHECK_POLICY(ipv4, n, inp) != 0) { m_freem(n); return (0); } -#ifdef IPSEC_NAT_T - up = intoudpcb(inp); - KASSERT(up != NULL, ("%s: udpcb NULL", __func__)); - if (up->u_flags & UF_ESPINUDP_ALL) { /* IPSec UDP encaps. */ - n = udp4_espdecap(inp, n, off); - if (n == NULL) /* Consumed. */ - return (0); + if (up->u_flags & UF_ESPINUDP) {/* IPSec UDP encaps. */ + if (IPSEC_ENABLED(ipv4) && + UDPENCAP_INPUT(n, off, AF_INET) != 0) + return (0); /* Consumed. */ } -#endif /* IPSEC_NAT_T */ #endif /* IPSEC */ #ifdef MAC if (mac_inpcb_check_deliver(inp, n) != 0) { @@ -376,16 +361,27 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, #endif /* INET6 */ ip_savecontrol(inp, &opts, ip, n); } + if ((inp->inp_vflag & INP_IPV4) && (inp->inp_flags2 & INP_ORIGDSTADDR)) { + tmpopts = sbcreatecontrol((caddr_t)&udp_in[1], + sizeof(struct sockaddr_in), IP_ORIGDSTADDR, IPPROTO_IP); + if (tmpopts) { + if (opts) { + tmpopts->m_next = opts; + opts = tmpopts; + } else + opts = tmpopts; + } + } #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { bzero(&udp_in6, sizeof(udp_in6)); udp_in6.sin6_len = sizeof(udp_in6); udp_in6.sin6_family = AF_INET6; - in6_sin_2_v4mapsin6(udp_in, &udp_in6); + in6_sin_2_v4mapsin6(&udp_in[0], &udp_in6); append_sa = (struct sockaddr *)&udp_in6; } else #endif /* INET6 */ - append_sa = (struct sockaddr *)udp_in; + append_sa = (struct sockaddr *)&udp_in[0]; m_adj(n, off); so = inp->inp_socket; @@ -411,7 +407,7 @@ udp_input(struct mbuf **mp, int *offp, int proto) uint16_t len, ip_len; struct inpcbinfo *pcbinfo; struct ip save_ip; - struct sockaddr_in udp_in; + struct sockaddr_in udp_in[2]; struct mbuf *m; struct m_tag *fwd_tag; int cscov_partial, iphlen; @@ -456,11 +452,15 @@ udp_input(struct mbuf **mp, int *offp, int proto) * Construct sockaddr format source address. Stuff source address * and datagram in user buffer. */ - bzero(&udp_in, sizeof(udp_in)); - udp_in.sin_len = sizeof(udp_in); - udp_in.sin_family = AF_INET; - udp_in.sin_port = uh->uh_sport; - udp_in.sin_addr = ip->ip_src; + bzero(&udp_in[0], sizeof(struct sockaddr_in) * 2); + udp_in[0].sin_len = sizeof(struct sockaddr_in); + udp_in[0].sin_family = AF_INET; + udp_in[0].sin_port = uh->uh_sport; + udp_in[0].sin_addr = ip->ip_src; + udp_in[1].sin_len = sizeof(struct sockaddr_in); + udp_in[1].sin_family = AF_INET; + udp_in[1].sin_port = uh->uh_dport; + udp_in[1].sin_addr = ip->ip_dst; /* * Make mbuf data length reflect UDP length. If not enough data to @@ -589,7 +589,7 @@ udp_input(struct mbuf **mp, int *offp, int proto) blocked = imo_multi_filter(imo, ifp, (struct sockaddr *)&group, - (struct sockaddr *)&udp_in); + (struct sockaddr *)&udp_in[0]); if (blocked != MCAST_PASS) { if (blocked == MCAST_NOTGMEMBER) IPSTAT_INC(ips_notmember); @@ -608,7 +608,7 @@ udp_input(struct mbuf **mp, int *offp, int proto) UDP_PROBE(receive, NULL, last, ip, last, uh); if (udp_append(last, ip, n, iphlen, - &udp_in)) { + udp_in)) { goto inp_lost; } } @@ -641,7 +641,7 @@ udp_input(struct mbuf **mp, int *offp, int proto) goto badunlocked; } UDP_PROBE(receive, NULL, last, ip, last, uh); - if (udp_append(last, ip, m, iphlen, &udp_in) == 0) + if (udp_append(last, ip, m, iphlen, udp_in) == 0) INP_RUNLOCK(last); inp_lost: INP_INFO_RUNLOCK(pcbinfo); @@ -688,13 +688,13 @@ udp_input(struct mbuf **mp, int *offp, int proto) INPLOOKUP_RLOCKPCB, ifp, m); if (inp == NULL) { if (udp_log_in_vain) { - char buf[4*sizeof "123"]; + char src[INET_ADDRSTRLEN]; + char dst[INET_ADDRSTRLEN]; - strcpy(buf, inet_ntoa(ip->ip_dst)); log(LOG_INFO, "Connection attempt to UDP %s:%d from %s:%d\n", - buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), - ntohs(uh->uh_sport)); + inet_ntoa_r(ip->ip_dst, dst), ntohs(uh->uh_dport), + inet_ntoa_r(ip->ip_src, src), ntohs(uh->uh_sport)); } UDPSTAT_INC(udps_noport); if (m->m_flags & (M_BCAST | M_MCAST)) { @@ -731,7 +731,7 @@ udp_input(struct mbuf **mp, int *offp, int proto) } UDP_PROBE(receive, NULL, inp, ip, inp, uh); - if (udp_append(inp, ip, m, iphlen, &udp_in) == 0) + if (udp_append(inp, ip, m, iphlen, udp_in) == 0) INP_RUNLOCK(inp); return (IPPROTO_DONE); @@ -911,13 +911,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; - bzero(&xi, sizeof(xi)); - xi.xi_len = sizeof xi; - /* XXX should avoid extra copy */ - bcopy(inp, &xi.xi_inp, sizeof *inp); - if (inp->inp_socket) - sotoxsocket(inp->inp_socket, &xi.xi_socket); - xi.xi_inp.inp_gencnt = inp->inp_gencnt; + in_pcbtoxinpcb(inp, &xi); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else @@ -1027,42 +1021,17 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt) switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { +#if defined(IPSEC) || defined(IPSEC_SUPPORT) +#ifdef INET case UDP_ENCAP: - INP_WUNLOCK(inp); - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); - if (error) - break; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); - INP_WLOCK(inp); -#ifdef IPSEC_NAT_T - up = intoudpcb(inp); - KASSERT(up != NULL, ("%s: up == NULL", __func__)); -#endif - switch (optval) { - case 0: - /* Clear all UDP encap. */ -#ifdef IPSEC_NAT_T - up->u_flags &= ~UF_ESPINUDP_ALL; -#endif - break; -#ifdef IPSEC_NAT_T - case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: - up->u_flags &= ~UF_ESPINUDP_ALL; - if (optval == UDP_ENCAP_ESPINUDP) - up->u_flags |= UF_ESPINUDP; - else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE) - up->u_flags |= UF_ESPINUDP_NON_IKE; - break; -#endif - default: - error = EINVAL; - break; + if (!IPSEC_ENABLED(ipv4)) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); } - INP_WUNLOCK(inp); + error = UDPENCAP_PCBCTL(inp, sopt); break; +#endif /* INET */ +#endif /* IPSEC */ case UDPLITE_SEND_CSCOV: case UDPLITE_RECV_CSCOV: if (!isudplite) { @@ -1099,15 +1068,17 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt) break; case SOPT_GET: switch (sopt->sopt_name) { -#ifdef IPSEC_NAT_T +#if defined(IPSEC) || defined(IPSEC_SUPPORT) +#ifdef INET case UDP_ENCAP: - up = intoudpcb(inp); - KASSERT(up != NULL, ("%s: up == NULL", __func__)); - optval = up->u_flags & UF_ESPINUDP_ALL; - INP_WUNLOCK(inp); - error = sooptcopyout(sopt, &optval, sizeof optval); + if (!IPSEC_ENABLED(ipv4)) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + error = UDPENCAP_PCBCTL(inp, sopt); break; -#endif +#endif /* INET */ +#endif /* IPSEC */ case UDPLITE_SEND_CSCOV: case UDPLITE_RECV_CSCOV: if (!isudplite) { @@ -1590,142 +1561,6 @@ release: return (error); } - -#if defined(IPSEC) && defined(IPSEC_NAT_T) -/* - * Potentially decap ESP in UDP frame. Check for an ESP header - * and optional marker; if present, strip the UDP header and - * push the result through IPSec. - * - * Returns mbuf to be processed (potentially re-allocated) or - * NULL if consumed and/or processed. - */ -static struct mbuf * -udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off) -{ - size_t minlen, payload, skip, iphlen; - caddr_t data; - struct udpcb *up; - struct m_tag *tag; - struct udphdr *udphdr; - struct ip *ip; - - INP_RLOCK_ASSERT(inp); - - /* - * Pull up data so the longest case is contiguous: - * IP/UDP hdr + non ESP marker + ESP hdr. - */ - minlen = off + sizeof(uint64_t) + sizeof(struct esp); - if (minlen > m->m_pkthdr.len) - minlen = m->m_pkthdr.len; - if ((m = m_pullup(m, minlen)) == NULL) { - IPSECSTAT_INC(ips_in_inval); - return (NULL); /* Bypass caller processing. */ - } - data = mtod(m, caddr_t); /* Points to ip header. */ - payload = m->m_len - off; /* Size of payload. */ - - if (payload == 1 && data[off] == '\xff') - return (m); /* NB: keepalive packet, no decap. */ - - up = intoudpcb(inp); - KASSERT(up != NULL, ("%s: udpcb NULL", __func__)); - KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0, - ("u_flags 0x%x", up->u_flags)); - - /* - * Check that the payload is large enough to hold an - * ESP header and compute the amount of data to remove. - * - * NB: the caller has already done a pullup for us. - * XXX can we assume alignment and eliminate bcopys? - */ - if (up->u_flags & UF_ESPINUDP_NON_IKE) { - /* - * draft-ietf-ipsec-nat-t-ike-0[01].txt and - * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring - * possible AH mode non-IKE marker+non-ESP marker - * from draft-ietf-ipsec-udp-encaps-00.txt. - */ - uint64_t marker; - - if (payload <= sizeof(uint64_t) + sizeof(struct esp)) - return (m); /* NB: no decap. */ - bcopy(data + off, &marker, sizeof(uint64_t)); - if (marker != 0) /* Non-IKE marker. */ - return (m); /* NB: no decap. */ - skip = sizeof(uint64_t) + sizeof(struct udphdr); - } else { - uint32_t spi; - - if (payload <= sizeof(struct esp)) { - IPSECSTAT_INC(ips_in_inval); - m_freem(m); - return (NULL); /* Discard. */ - } - bcopy(data + off, &spi, sizeof(uint32_t)); - if (spi == 0) /* Non-ESP marker. */ - return (m); /* NB: no decap. */ - skip = sizeof(struct udphdr); - } - - /* - * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember - * the UDP ports. This is required if we want to select - * the right SPD for multiple hosts behind same NAT. - * - * NB: ports are maintained in network byte order everywhere - * in the NAT-T code. - */ - tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS, - 2 * sizeof(uint16_t), M_NOWAIT); - if (tag == NULL) { - IPSECSTAT_INC(ips_in_nomem); - m_freem(m); - return (NULL); /* Discard. */ - } - iphlen = off - sizeof(struct udphdr); - udphdr = (struct udphdr *)(data + iphlen); - ((uint16_t *)(tag + 1))[0] = udphdr->uh_sport; - ((uint16_t *)(tag + 1))[1] = udphdr->uh_dport; - m_tag_prepend(m, tag); - - /* - * Remove the UDP header (and possibly the non ESP marker) - * IP header length is iphlen - * Before: - * <--- off ---> - * +----+------+-----+ - * | IP | UDP | ESP | - * +----+------+-----+ - * <-skip-> - * After: - * +----+-----+ - * | IP | ESP | - * +----+-----+ - * <-skip-> - */ - ovbcopy(data, data + skip, iphlen); - m_adj(m, skip); - - ip = mtod(m, struct ip *); - ip->ip_len = htons(ntohs(ip->ip_len) - skip); - ip->ip_p = IPPROTO_ESP; - - /* - * We cannot yet update the cksums so clear any - * h/w cksum flags as they are no longer valid. - */ - if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) - m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR); - - (void) ipsec_common_input(m, iphlen, offsetof(struct ip, ip_p), - AF_INET, ip->ip_p); - return (NULL); /* NB: consumed, bypass processing. */ -} -#endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */ - static void udp_abort(struct socket *so) { diff --git a/freebsd/sys/netinet/udp_var.h b/freebsd/sys/netinet/udp_var.h index 172d969d..e92ac961 100644 --- a/freebsd/sys/netinet/udp_var.h +++ b/freebsd/sys/netinet/udp_var.h @@ -11,7 +11,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet6/frag6.c b/freebsd/sys/netinet6/frag6.c index 4cbd3000..f0721a4c 100644 --- a/freebsd/sys/netinet6/frag6.c +++ b/freebsd/sys/netinet6/frag6.c @@ -530,6 +530,11 @@ insert: af6 = ip6af->ip6af_down; frag6_deq(ip6af); while (af6 != (struct ip6asfrag *)q6) { + m->m_pkthdr.csum_flags &= + IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags; + m->m_pkthdr.csum_data += + IP6_REASS_MBUF(af6)->m_pkthdr.csum_data; + af6dwn = af6->ip6af_down; frag6_deq(af6); while (t->m_next) @@ -540,6 +545,10 @@ insert: af6 = af6dwn; } + while (m->m_pkthdr.csum_data & 0xffff0000) + m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + + (m->m_pkthdr.csum_data >> 16); + /* adjust offset to point where the original next header starts */ offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); free(ip6af, M_FTABLE); diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c index 14ce2b3b..48066467 100644 --- a/freebsd/sys/netinet6/icmp6.c +++ b/freebsd/sys/netinet6/icmp6.c @@ -43,7 +43,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -2161,7 +2161,7 @@ icmp6_reflect(struct mbuf *m, size_t off) * source address of the erroneous packet. */ in6_splitscope(&ip6->ip6_src, &dst6, &scopeid); - error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, + error = in6_selectsrc_addr(M_GETFIB(m), &dst6, scopeid, NULL, &src6, &hlim); if (error) { @@ -2303,7 +2303,7 @@ icmp6_redirect_input(struct mbuf *m, int off) uint32_t scopeid; in6_splitscope(&reddst6, &kdst, &scopeid); - if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &kdst, scopeid, 0, 0,&nh6)==0){ + if (fib6_lookup_nh_basic(ifp->if_fib, &kdst, scopeid, 0, 0,&nh6)==0){ if ((nh6.nh_flags & NHF_GATEWAY) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; no route " diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c index 8a4c3663..bc390e50 100644 --- a/freebsd/sys/netinet6/in6.c +++ b/freebsd/sys/netinet6/in6.c @@ -43,7 +43,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -163,6 +163,7 @@ in6_newaddrmsg(struct in6_ifaddr *ia, int cmd) struct sockaddr_dl gateway; struct sockaddr_in6 mask, addr; struct rtentry rt; + int fibnum; /* * initialize for rtmsg generation @@ -180,8 +181,9 @@ in6_newaddrmsg(struct in6_ifaddr *ia, int cmd) rt.rt_flags = RTF_HOST | RTF_STATIC; if (cmd == RTM_ADD) rt.rt_flags |= RTF_UP; - /* Announce arrival of local address to all FIBs. */ - rt_newaddrmsg(cmd, &ia->ia_ifa, 0, &rt); + fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS : ia62ifa(ia)->ifa_ifp->if_fib; + /* Announce arrival of local address to this FIB. */ + rt_newaddrmsg_fib(cmd, &ia->ia_ifa, 0, &rt, fibnum); } int @@ -558,8 +560,11 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, */ if ((error = in6_update_ifa(ifp, ifra, ia, 0)) != 0) goto out; - if (ia != NULL) + if (ia != NULL) { + if (ia->ia_ifa.ifa_carp) + (*carp_detach_p)(&ia->ia_ifa, true); ifa_free(&ia->ia_ifa); + } if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr)) == NULL) { /* @@ -626,7 +631,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, */ if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) { if (carp_attached) - (*carp_detach_p)(&ia->ia_ifa); + (*carp_detach_p)(&ia->ia_ifa, false); goto out; } } @@ -1247,7 +1252,7 @@ in6_purgeaddr(struct ifaddr *ifa) int plen, error; if (ifa->ifa_carp) - (*carp_detach_p)(ifa); + (*carp_detach_p)(ifa, false); /* * Remove the loopback route to the interface address. @@ -1963,7 +1968,6 @@ in6_if2idlen(struct ifnet *ifp) case IFT_ETHER: /* RFC2464 */ case IFT_PROPVIRTUAL: /* XXX: no RFC. treat it as ether */ case IFT_L2VLAN: /* ditto */ - case IFT_IEEE80211: /* ditto */ case IFT_BRIDGE: /* bridge(4) only does Ethernet-like links */ case IFT_INFINIBAND: return (64); @@ -2119,15 +2123,15 @@ in6_lltable_rtcheck(struct ifnet *ifp, uint32_t scopeid; int error; char ip6buf[INET6_ADDRSTRLEN]; + int fibnum; KASSERT(l3addr->sa_family == AF_INET6, ("sin_family %d", l3addr->sa_family)); - /* Our local addresses are always only installed on the default FIB. */ - sin6 = (const struct sockaddr_in6 *)l3addr; in6_splitscope(&sin6->sin6_addr, &dst, &scopeid); - error = fib6_lookup_nh_basic(RT_DEFAULT_FIB, &dst, scopeid, 0, 0, &nh6); + fibnum = V_rt_add_addr_allfibs ? RT_DEFAULT_FIB : ifp->if_fib; + error = fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6); if (error != 0 || (nh6.nh_flags & NHF_GATEWAY) || nh6.nh_ifp != ifp) { struct ifaddr *ifa; /* diff --git a/freebsd/sys/netinet6/in6.h b/freebsd/sys/netinet6/in6.h index 62c5e0b0..ed26a7cd 100644 --- a/freebsd/sys/netinet6/in6.h +++ b/freebsd/sys/netinet6/in6.h @@ -41,7 +41,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -432,10 +432,7 @@ struct route_in6 { #define IPV6_BINDV6ONLY IPV6_V6ONLY #endif -#if 1 /* IPSEC */ #define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */ -#endif /* IPSEC */ - /* 29; unused; was IPV6_FAITH */ #if 1 /* IPV6FIREWALL */ #define IPV6_FW_ADD 30 /* add a firewall rule to chain */ @@ -500,6 +497,9 @@ struct route_in6 { #define IPV6_RECVFLOWID 70 /* bool; receive IP6 flowid/flowtype w/ datagram */ #define IPV6_RECVRSSBUCKETID 71 /* bool; receive IP6 RSS bucket id w/ datagram */ +#define IPV6_ORIGDSTADDR 72 /* bool: allow getting dstaddr /port info */ +#define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR + /* * The following option is private; do not use it from user applications. * It is deliberately defined to the same value as IP_MSFILTER. diff --git a/freebsd/sys/netinet6/in6_cksum.c b/freebsd/sys/netinet6/in6_cksum.c index 6eebdadc..14e5dd30 100644 --- a/freebsd/sys/netinet6/in6_cksum.c +++ b/freebsd/sys/netinet6/in6_cksum.c @@ -43,7 +43,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet6/in6_fib.c b/freebsd/sys/netinet6/in6_fib.c index 824db1fc..3c94b0c5 100644 --- a/freebsd/sys/netinet6/in6_fib.c +++ b/freebsd/sys/netinet6/in6_fib.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet6/in6_fib.h b/freebsd/sys/netinet6/in6_fib.h index 3d58cd22..53f35a84 100644 --- a/freebsd/sys/netinet6/in6_fib.h +++ b/freebsd/sys/netinet6/in6_fib.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet6/in6_ifattach.c b/freebsd/sys/netinet6/in6_ifattach.c index 879336a4..88bd95d4 100644 --- a/freebsd/sys/netinet6/in6_ifattach.c +++ b/freebsd/sys/netinet6/in6_ifattach.c @@ -282,7 +282,6 @@ found: case IFT_ISO88025: case IFT_ATM: case IFT_IEEE1394: - case IFT_IEEE80211: /* IEEE802/EUI64 cases - what others? */ /* IEEE1394 uses 16byte length address starting with EUI64 */ if (addrlen > 8) diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c index 95e376c7..960d8d1f 100644 --- a/freebsd/sys/netinet6/in6_pcb.c +++ b/freebsd/sys/netinet6/in6_pcb.c @@ -47,7 +47,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -1281,7 +1281,7 @@ in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, } void -init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) +init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m, int srcordst) { struct ip6_hdr *ip; @@ -1289,7 +1289,7 @@ init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) bzero(sin6, sizeof(*sin6)); sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; - sin6->sin6_addr = ip->ip6_src; + sin6->sin6_addr = srcordst ? ip->ip6_dst : ip->ip6_src; (void)sa6_recoverscope(sin6); /* XXX: should catch errors... */ diff --git a/freebsd/sys/netinet6/in6_pcb.h b/freebsd/sys/netinet6/in6_pcb.h index e758dace..f21230bc 100644 --- a/freebsd/sys/netinet6/in6_pcb.h +++ b/freebsd/sys/netinet6/in6_pcb.h @@ -41,7 +41,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -113,7 +113,7 @@ int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam); int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam); int in6_selecthlim(struct in6pcb *, struct ifnet *); int in6_pcbsetport(struct in6_addr *, struct inpcb *, struct ucred *); -void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m); +void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m, int); #endif /* _KERNEL */ #endif /* !_NETINET6_IN6_PCB_H_ */ diff --git a/freebsd/sys/netinet6/in6_proto.c b/freebsd/sys/netinet6/in6_proto.c index 8a9c1cd9..03bbbeac 100644 --- a/freebsd/sys/netinet6/in6_proto.c +++ b/freebsd/sys/netinet6/in6_proto.c @@ -43,7 +43,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -123,11 +123,6 @@ __FBSDID("$FreeBSD$"); #include #endif /* SCTP */ -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ - #include /* @@ -192,7 +187,7 @@ struct protosw inet6sw[] = { .pr_type = SOCK_SEQPACKET, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_SCTP, - .pr_flags = PR_WANTRCVD, + .pr_flags = PR_WANTRCVD|PR_LASTHDR, .pr_input = sctp6_input, .pr_ctlinput = sctp6_ctlinput, .pr_ctloutput = sctp_ctloutput, @@ -206,7 +201,7 @@ struct protosw inet6sw[] = { .pr_type = SOCK_STREAM, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_SCTP, - .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, + .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LASTHDR, .pr_input = sctp6_input, .pr_ctlinput = sctp6_ctlinput, .pr_ctloutput = sctp_ctloutput, @@ -278,33 +273,6 @@ struct protosw inet6sw[] = { .pr_input = frag6_input, .pr_usrreqs = &nousrreqs }, -#ifdef IPSEC -{ - .pr_type = SOCK_RAW, - .pr_domain = &inet6domain, - .pr_protocol = IPPROTO_AH, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ipsec6_common_input, - .pr_usrreqs = &nousrreqs, -}, -{ - .pr_type = SOCK_RAW, - .pr_domain = &inet6domain, - .pr_protocol = IPPROTO_ESP, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ipsec6_common_input, - .pr_ctlinput = esp6_ctlinput, - .pr_usrreqs = &nousrreqs, -}, -{ - .pr_type = SOCK_RAW, - .pr_domain = &inet6domain, - .pr_protocol = IPPROTO_IPCOMP, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ipsec6_common_input, - .pr_usrreqs = &nousrreqs, -}, -#endif /* IPSEC */ #ifdef INET { .pr_type = SOCK_RAW, @@ -472,7 +440,7 @@ SYSCTL_NODE(_net_inet6, IPPROTO_TCP, tcp6, CTLFLAG_RW, 0, "TCP6"); #ifdef SCTP SYSCTL_NODE(_net_inet6, IPPROTO_SCTP, sctp6, CTLFLAG_RW, 0, "SCTP6"); #endif -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, CTLFLAG_RW, 0, "IPSEC6"); #endif /* IPSEC */ @@ -509,19 +477,21 @@ sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS) SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0, - "Enable IPv6 forwarding between interfaces"); + "Enable forwarding of IPv6 packets between interfaces"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_sendredirects), 0, - "Send a redirect message when forwarding back to a source link"); + "Send ICMPv6 redirects for unforwardable IPv6 packets"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defhlim), 0, - "Default hop limit"); + "Default hop limit to use for outgoing IPv6 packets"); SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat, ip6stat, "IP6 statistics (struct ip6stat, netinet6/ip6_var.h)"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0, - "Maximum allowed number of outstanding fragmented IPv6 packets"); + "Default maximum number of outstanding fragmented IPv6 packets. " + "A value of 0 means no fragmented packets will be accepted, while a " + "a value of -1 means no limit"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0, "Default value of per-interface flag for accepting ICMPv6 RA messages"); @@ -543,7 +513,8 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval, "Frequency in seconds at which to log IPv6 forwarding errors"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_hdrnestlimit), 0, - "Maximum allowed number of nested protocol headers"); + "Default maximum number of IPv6 extension headers permitted on " + "incoming IPv6 packets, 0 for no artificial limit"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_dad_count), 0, "Number of ICMPv6 NS messages sent during duplicate address detection"); @@ -552,7 +523,8 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel, "Provide an IPv6 flowlabel in outbound packets"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defmcasthlim), 0, - "Default hop limit for multicast packets"); + "Default hop limit for IPv6 multicast packets originating from this " + "node"); SYSCTL_STRING(_net_inet6_ip6, IPV6CTL_KAME_VERSION, kame_version, CTLFLAG_RD, __KAME_VERSION, 0, "KAME version string"); diff --git a/freebsd/sys/netinet6/in6_src.c b/freebsd/sys/netinet6/in6_src.c index 2a50a975..cae96274 100644 --- a/freebsd/sys/netinet6/in6_src.c +++ b/freebsd/sys/netinet6/in6_src.c @@ -43,7 +43,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -299,7 +299,7 @@ in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock, */ /* get the outgoing interface */ if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp, - (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0) + (inp != NULL) ? inp->inp_inc.inc_fibnum : fibnum)) != 0) return (error); #ifdef DIAGNOSTIC @@ -565,7 +565,7 @@ in6_selectsrc_socket(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, uint32_t fibnum; int error; - fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB; + fibnum = inp->inp_inc.inc_fibnum; retifp = NULL; error = in6_selectsrc(fibnum, dstsock, opts, inp, cred, &retifp, srcp); diff --git a/freebsd/sys/netinet6/in6_var.h b/freebsd/sys/netinet6/in6_var.h index d7c10384..a3dac7f4 100644 --- a/freebsd/sys/netinet6/in6_var.h +++ b/freebsd/sys/netinet6/in6_var.h @@ -41,7 +41,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet6/ip6_forward.c b/freebsd/sys/netinet6/ip6_forward.c index b57d31fa..f3e6c11c 100644 --- a/freebsd/sys/netinet6/ip6_forward.c +++ b/freebsd/sys/netinet6/ip6_forward.c @@ -71,12 +71,7 @@ __FBSDID("$FreeBSD$"); #include -#ifdef IPSEC -#include -#include -#include -#include -#endif /* IPSEC */ +#include /* * Forward a packet. If some error occurs return the sender @@ -102,9 +97,6 @@ ip6_forward(struct mbuf *m, int srcrt) struct ifnet *origifp; /* maybe unnecessary */ u_int32_t inzone, outzone; struct in6_addr src_in6, dst_in6, odst; -#ifdef IPSEC - struct secpolicy *sp = NULL; -#endif struct m_tag *fwd_tag; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; @@ -132,32 +124,17 @@ ip6_forward(struct mbuf *m, int srcrt) m_freem(m); return; } -#ifdef IPSEC - /* - * Check if this packet has an active SA and needs to be dropped - * instead of forwarded. - */ - if (ip6_ipsec_fwd(m) != 0) { - IP6STAT_INC(ip6s_cantforward); - m_freem(m); - return; - } -#endif /* IPSEC */ + if ( #ifdef IPSTEALTH - if (!V_ip6stealth) { + V_ip6stealth == 0 && #endif - if (ip6->ip6_hlim <= IPV6_HLIMDEC) { + ip6->ip6_hlim <= IPV6_HLIMDEC) { /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ icmp6_error(m, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_TRANSIT, 0); + ICMP6_TIME_EXCEED_TRANSIT, 0); return; } - ip6->ip6_hlim -= IPV6_HLIMDEC; - -#ifdef IPSTEALTH - } -#endif /* * Save at most ICMPV6_PLD_MAXLEN (= the min IPv6 MTU - @@ -170,167 +147,22 @@ ip6_forward(struct mbuf *m, int srcrt) */ mcopy = m_copym(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN), M_NOWAIT); - -#ifdef IPSEC - /* get a security policy for this packet */ - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, &error); - if (sp == NULL) { - IPSEC6STAT_INC(ips_out_inval); - IP6STAT_INC(ip6s_cantforward); - if (mcopy) { -#if 0 - /* XXX: what icmp ? */ -#else - m_freem(mcopy); +#ifdef IPSTEALTH + if (V_ip6stealth == 0) #endif - } - m_freem(m); - return; - } - - error = 0; + ip6->ip6_hlim -= IPV6_HLIMDEC; - /* check policy */ - switch (sp->policy) { - case IPSEC_POLICY_DISCARD: - /* - * This packet is just discarded. - */ - IPSEC6STAT_INC(ips_out_polvio); - IP6STAT_INC(ip6s_cantforward); - KEY_FREESP(&sp); - if (mcopy) { -#if 0 - /* XXX: what icmp ? */ -#else +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv6)) { + if ((error = IPSEC_FORWARD(ipv6, m)) != 0) { + /* mbuf consumed by IPsec */ m_freem(mcopy); -#endif - } - m_freem(m); - return; - - case IPSEC_POLICY_BYPASS: - case IPSEC_POLICY_NONE: - /* no need to do IPsec. */ - KEY_FREESP(&sp); - goto skip_ipsec; - - case IPSEC_POLICY_IPSEC: - if (sp->req == NULL) { - /* XXX should be panic ? */ - printf("ip6_forward: No IPsec request specified.\n"); - IP6STAT_INC(ip6s_cantforward); - KEY_FREESP(&sp); - if (mcopy) { -#if 0 - /* XXX: what icmp ? */ -#else - m_freem(mcopy); -#endif - } - m_freem(m); + if (error != EINPROGRESS) + IP6STAT_INC(ip6s_cantforward); return; } - /* do IPsec */ - break; - - case IPSEC_POLICY_ENTRUST: - default: - /* should be panic ?? */ - printf("ip6_forward: Invalid policy found. %d\n", sp->policy); - KEY_FREESP(&sp); - goto skip_ipsec; + /* No IPsec processing required */ } - - { - struct ipsecrequest *isr = NULL; - - /* - * when the kernel forwards a packet, it is not proper to apply - * IPsec transport mode to the packet. This check avoid from this. - * at present, if there is even a transport mode SA request in the - * security policy, the kernel does not apply IPsec to the packet. - * this check is not enough because the following case is valid. - * ipsec esp/tunnel/xxx-xxx/require esp/transport//require; - */ - for (isr = sp->req; isr; isr = isr->next) { - if (isr->saidx.mode == IPSEC_MODE_ANY) - goto doipsectunnel; - if (isr->saidx.mode == IPSEC_MODE_TUNNEL) - goto doipsectunnel; - } - - /* - * if there's no need for tunnel mode IPsec, skip. - */ - if (!isr) - goto skip_ipsec; - - doipsectunnel: - /* - * All the extension headers will become inaccessible - * (since they can be encrypted). - * Don't panic, we need no more updates to extension headers - * on inner IPv6 packet (since they are now encapsulated). - * - * IPv6 [ESP|AH] IPv6 [extension headers] payload - */ - - /* - * If we need to encapsulate the packet, do it here - * ipsec6_proces_packet will send the packet using ip6_output - */ - error = ipsec6_process_packet(m, sp->req); - /* Release SP if an error occurred */ - if (error != 0) - KEY_FREESP(&sp); - if (error == EJUSTRETURN) { - /* - * We had a SP with a level of 'use' and no SA. We - * will just continue to process the packet without - * IPsec processing. - */ - error = 0; - goto skip_ipsec; - } - - if (error) { - /* mbuf is already reclaimed in ipsec6_process_packet. */ - switch (error) { - case EHOSTUNREACH: - case ENETUNREACH: - case EMSGSIZE: - case ENOBUFS: - case ENOMEM: - break; - default: - printf("ip6_output (ipsec): error code %d\n", error); - /* FALLTHROUGH */ - case ENOENT: - /* don't show these error codes to the user */ - break; - } - IP6STAT_INC(ip6s_cantforward); - if (mcopy) { -#if 0 - /* XXX: what icmp ? */ -#else - m_freem(mcopy); -#endif - } - return; - } else { - /* - * In the FAST IPSec case we have already - * re-injected the packet and it has been freed - * by the ipsec_done() function. So, just clean - * up after ourselves. - */ - m = NULL; - goto freecopy; - } - } -skip_ipsec: #endif again: bzero(&rin6, sizeof(struct route_in6)); @@ -542,34 +374,9 @@ pass: /* See if the size was changed by the packet filter. */ if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) { in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig); - if (mcopy) { - u_long mtu; -#ifdef IPSEC - size_t ipsechdrsiz; -#endif /* IPSEC */ - - mtu = IN6_LINKMTU(rt->rt_ifp); -#ifdef IPSEC - /* - * When we do IPsec tunnel ingress, we need to play - * with the link value (decrement IPsec header size - * from mtu value). The code is much simpler than v4 - * case, as we have the outgoing interface for - * encapsulated packet as "rt->rt_ifp". - */ - ipsechdrsiz = ipsec_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND, - NULL); - if (ipsechdrsiz < mtu) - mtu -= ipsechdrsiz; - /* - * if mtu becomes less than minimum MTU, - * tell minimum MTU (and I'll need to fragment it). - */ - if (mtu < IPV6_MMTU) - mtu = IPV6_MMTU; -#endif /* IPSEC */ - icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu); - } + if (mcopy) + icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, + IN6_LINKMTU(rt->rt_ifp)); goto bad; } diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c index e34ac0e6..5ef0e29a 100644 --- a/freebsd/sys/netinet6/ip6_input.c +++ b/freebsd/sys/netinet6/ip6_input.c @@ -43,7 +43,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -120,12 +120,7 @@ __FBSDID("$FreeBSD$"); #include #include -#ifdef IPSEC -#include -#include -#include -#include -#endif /* IPSEC */ +#include #include @@ -527,14 +522,11 @@ ip6_direct_input(struct mbuf *m) goto bad; } -#ifdef IPSEC - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if (ip6_ipsec_input(m, nxt)) - goto bad; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv6)) { + if (IPSEC_INPUT(ipv6, m, off, nxt) != 0) + return; + } #endif /* IPSEC */ nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); @@ -565,7 +557,7 @@ ip6_input(struct mbuf *m) if ((ND_IFINFO(rcvif)->flags & ND6_IFF_IFDISABLED)) goto bad; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * should the inner packet be considered authentic? * see comment in ah4_input(). @@ -737,9 +729,9 @@ ip6_input(struct mbuf *m) * ip6 pointer. */ if (V_ip6_forwarding != 0 -#ifdef IPSEC - && !key_havesp(IPSEC_DIR_INBOUND) - && !key_havesp(IPSEC_DIR_OUTBOUND) +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + && (!IPSEC_ENABLED(ipv6) || + IPSEC_CAPS(ipv6, m, IPSEC_CAP_OPERABLE) == 0) #endif ) { if ((m = ip6_tryforward(m)) == NULL) @@ -751,12 +743,13 @@ ip6_input(struct mbuf *m) goto hbhcheck; } } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Bypass packet filtering for packets previously handled by IPsec. */ - if (ip6_ipsec_filtertunnel(m)) - goto passin; + if (IPSEC_ENABLED(ipv6) && + IPSEC_CAPS(ipv6, m, IPSEC_CAP_BYPASS_FILTER) != 0) + goto passin; #endif /* * Run through list of hooks for input packets. @@ -964,14 +957,11 @@ passin: goto bad; } -#ifdef IPSEC - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if (ip6_ipsec_input(m, nxt)) - goto bad; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv6)) { + if (IPSEC_INPUT(ipv6, m, off, nxt) != 0) + return; + } #endif /* IPSEC */ nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); @@ -1228,13 +1218,48 @@ ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp, #ifdef SO_TIMESTAMP if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) { - struct timeval tv; + union { + struct timeval tv; + struct bintime bt; + struct timespec ts; + } t; + + switch (inp->inp_socket->so_ts_clock) { + case SO_TS_REALTIME_MICRO: + microtime(&t.tv); + *mp = sbcreatecontrol((caddr_t) &t.tv, sizeof(t.tv), + SCM_TIMESTAMP, SOL_SOCKET); + if (*mp) + mp = &(*mp)->m_next; + break; - microtime(&tv); - *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), - SCM_TIMESTAMP, SOL_SOCKET); - if (*mp) - mp = &(*mp)->m_next; + case SO_TS_BINTIME: + bintime(&t.bt); + *mp = sbcreatecontrol((caddr_t)&t.bt, sizeof(t.bt), + SCM_BINTIME, SOL_SOCKET); + if (*mp) + mp = &(*mp)->m_next; + break; + + case SO_TS_REALTIME: + nanotime(&t.ts); + *mp = sbcreatecontrol((caddr_t)&t.ts, sizeof(t.ts), + SCM_REALTIME, SOL_SOCKET); + if (*mp) + mp = &(*mp)->m_next; + break; + + case SO_TS_MONOTONIC: + nanouptime(&t.ts); + *mp = sbcreatecontrol((caddr_t)&t.ts, sizeof(t.ts), + SCM_MONOTONIC, SOL_SOCKET); + if (*mp) + mp = &(*mp)->m_next; + break; + + default: + panic("unknown (corrupted) so_ts_clock"); + } } #endif diff --git a/freebsd/sys/netinet6/ip6_ipsec.c b/freebsd/sys/netinet6/ip6_ipsec.c deleted file mode 100644 index fe61dab9..00000000 --- a/freebsd/sys/netinet6/ip6_ipsec.c +++ /dev/null @@ -1,393 +0,0 @@ -#include - -/*- - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef IPSEC -#include -#include -#include -#include -#ifdef IPSEC_DEBUG -#include -#else -#define KEYDEBUG(lev,arg) -#endif -#endif /*IPSEC*/ - -#include -#include - -extern struct protosw inet6sw[]; - - -#ifdef INET6 -#ifdef IPSEC -#ifdef IPSEC_FILTERTUNNEL -static VNET_DEFINE(int, ip6_ipsec6_filtertunnel) = 1; -#else -static VNET_DEFINE(int, ip6_ipsec6_filtertunnel) = 0; -#endif -#define V_ip6_ipsec6_filtertunnel VNET(ip6_ipsec6_filtertunnel) - -SYSCTL_DECL(_net_inet6_ipsec6); -SYSCTL_VNET_INT(_net_inet6_ipsec6, OID_AUTO, - filtertunnel, CTLFLAG_RW, &VNET_NAME(ip6_ipsec6_filtertunnel), 0, - "If set filter packets from an IPsec tunnel."); -#endif /* IPSEC */ -#endif /* INET6 */ - -/* - * Check if we have to jump over firewall processing for this packet. - * Called from ip6_input(). - * 1 = jump over firewall, 0 = packet goes through firewall. - */ -int -ip6_ipsec_filtertunnel(struct mbuf *m) -{ -#if defined(IPSEC) - - /* - * Bypass packet filtering for packets previously handled by IPsec. - */ - if (!V_ip6_ipsec6_filtertunnel && - m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) - return 1; -#endif - return 0; -} - -/* - * Check if this packet has an active SA and needs to be dropped instead - * of forwarded. - * Called from ip6_input(). - * 1 = drop packet, 0 = forward packet. - */ -int -ip6_ipsec_fwd(struct mbuf *m) -{ -#ifdef IPSEC - struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secpolicy *sp; - int s, error; - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); - s = splnet(); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); - } else { - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, - IP_FORWARDING, &error); - } - if (sp == NULL) { /* NB: can happen if error */ - splx(s); - /*XXX error stat???*/ - DPRINTF(("%s: no SP for forwarding\n", __func__)); /*XXX*/ - return 1; - } - - /* - * Check security policy against packet attributes. - */ - error = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - splx(s); - if (error) { - IP6STAT_INC(ip6s_cantforward); - return 1; - } -#endif /* IPSEC */ - return 0; -} - -/* - * Check if protocol type doesn't have a further header and do IPSEC - * decryption or reject right now. Protocols with further headers get - * their IPSEC treatment within the protocol specific processing. - * Called from ip6_input(). - * 1 = drop packet, 0 = continue processing packet. - */ -int -ip6_ipsec_input(struct mbuf *m, int nxt) -{ -#ifdef IPSEC - struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secpolicy *sp; - int s, error; - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 && - ipsec6_in_reject(m, NULL)) { - - /* - * Check if the packet has already had IPsec processing - * done. If so, then just pass it along. This tag gets - * set during AH, ESP, etc. input handling, before the - * packet is returned to the ip input queue for delivery. - */ - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); - s = splnet(); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); - } else { - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, - IP_FORWARDING, &error); - } - if (sp != NULL) { - /* - * Check security policy against packet attributes. - */ - error = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - } else { - /* XXX error stat??? */ - error = EINVAL; - DPRINTF(("%s: no SP, packet discarded\n", __func__));/*XXX*/ - return 1; - } - splx(s); - if (error) - return 1; - } -#endif /* IPSEC */ - return 0; -} - -/* - * Called from ip6_output(). - * 1 = drop packet, 0 = continue processing packet, - * -1 = packet was reinjected and stop processing packet - */ - -int -ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error, - struct ifnet **ifp, struct secpolicy **sp) -{ -#ifdef IPSEC - struct tdb_ident *tdbi; - struct m_tag *mtag; - /* XXX int s; */ - if (sp == NULL) - return 1; - mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - *sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); - if (*sp == NULL) - *error = -EINVAL; /* force silent drop */ - m_tag_delete(*m, mtag); - } else { - *sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags, - error, inp); - } - - /* - * There are four return cases: - * sp != NULL apply IPsec policy - * sp == NULL, error == 0 no IPsec handling needed - * sp == NULL, error == -EINVAL discard packet w/o error - * sp == NULL, error != 0 discard packet, report error - */ - if (*sp != NULL) { - /* Loop detection, check if ipsec processing already done */ - KASSERT((*sp)->req != NULL, ("ip_output: no ipsec request")); - for (mtag = m_tag_first(*m); mtag != NULL; - mtag = m_tag_next(*m, mtag)) { - if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) - continue; - if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && - mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) - continue; - /* - * Check if policy has no SA associated with it. - * This can happen when an SP has yet to acquire - * an SA; e.g. on first reference. If it occurs, - * then we let ipsec4_process_packet do its thing. - */ - if ((*sp)->req->sav == NULL) - break; - tdbi = (struct tdb_ident *)(mtag + 1); - if (tdbi->spi == (*sp)->req->sav->spi && - tdbi->proto == (*sp)->req->sav->sah->saidx.proto && - bcmp(&tdbi->dst, &(*sp)->req->sav->sah->saidx.dst, - sizeof (union sockaddr_union)) == 0) { - /* - * No IPsec processing is needed, free - * reference to SP. - * - * NB: null pointer to avoid free at - * done: below. - */ - KEY_FREESP(sp), *sp = NULL; - /* XXX splx(s); */ - goto done; - } - } - - /* - * Do delayed checksums now because we send before - * this is done in the normal processing path. - * For IPv6 we do delayed checksums in ip6_output.c. - */ -#ifdef INET - if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - ipseclog((LOG_DEBUG, - "%s: we do not support IPv4 over IPv6", __func__)); - in_delayed_cksum(*m); - (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } -#endif - - /* - * Preserve KAME behaviour: ENOENT can be returned - * when an SA acquire is in progress. Don't propagate - * this to user-level; it confuses applications. - * - * XXX this will go away when the SADB is redone. - */ - if (*error == ENOENT) - *error = 0; - goto do_ipsec; - } else { /* sp == NULL */ - if (*error != 0) { - /* - * Hack: -EINVAL is used to signal that a packet - * should be silently discarded. This is typically - * because we asked key management for an SA and - * it was delayed (e.g. kicked up to IKE). - */ - if (*error == -EINVAL) - *error = 0; - goto bad; - } else { - /* No IPsec processing for this packet. */ - } - } -done: - return 0; -do_ipsec: - return -1; -bad: - return 1; -#endif /* IPSEC */ - return 0; -} - -#if 0 -/* - * Compute the MTU for a forwarded packet that gets IPSEC encapsulated. - * Called from ip_forward(). - * Returns MTU suggestion for ICMP needfrag reply. - */ -int -ip6_ipsec_mtu(struct mbuf *m) -{ - int mtu = 0; - /* - * If the packet is routed over IPsec tunnel, tell the - * originator the tunnel MTU. - * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz - * XXX quickhack!!! - */ -#ifdef IPSEC - struct secpolicy *sp = NULL; - int ipsecerror; - int ipsechdr; - struct route *ro; - sp = ipsec_getpolicybyaddr(m, - IPSEC_DIR_OUTBOUND, - IP_FORWARDING, - &ipsecerror); - if (sp != NULL) { - /* count IPsec header size */ - ipsechdr = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, NULL); - - /* - * find the correct route for outer IPv4 - * header, compute tunnel MTU. - */ - if (sp->req != NULL && - sp->req->sav != NULL && - sp->req->sav->sah != NULL) { - ro = &sp->req->sav->sah->route_cache.sa_route; - if (ro->ro_rt && ro->ro_rt->rt_ifp) { - mtu = - ro->ro_rt->rt_rmx.rmx_mtu ? - ro->ro_rt->rt_rmx.rmx_mtu : - ro->ro_rt->rt_ifp->if_mtu; - mtu -= ipsechdr; - } - } - KEY_FREESP(&sp); - } -#endif /* IPSEC */ - /* XXX else case missing. */ - return mtu; -} -#endif diff --git a/freebsd/sys/netinet6/ip6_ipsec.h b/freebsd/sys/netinet6/ip6_ipsec.h deleted file mode 100644 index e335d850..00000000 --- a/freebsd/sys/netinet6/ip6_ipsec.h +++ /dev/null @@ -1,42 +0,0 @@ -/*- - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _NETINET_IP6_IPSEC_H_ -#define _NETINET_IP6_IPSEC_H_ - -int ip6_ipsec_filtertunnel(struct mbuf *); -int ip6_ipsec_fwd(struct mbuf *); -int ip6_ipsec_input(struct mbuf *, int); -int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *); -#if 0 -int ip6_ipsec_mtu(struct mbuf *); -#endif -#endif diff --git a/freebsd/sys/netinet6/ip6_mroute.c b/freebsd/sys/netinet6/ip6_mroute.c index e40ce06c..0a597191 100644 --- a/freebsd/sys/netinet6/ip6_mroute.c +++ b/freebsd/sys/netinet6/ip6_mroute.c @@ -47,7 +47,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet6/ip6_output.c b/freebsd/sys/netinet6/ip6_output.c index 63d1dac1..3be690a4 100644 --- a/freebsd/sys/netinet6/ip6_output.c +++ b/freebsd/sys/netinet6/ip6_output.c @@ -43,7 +43,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -67,6 +67,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -109,12 +110,7 @@ __FBSDID("$FreeBSD$"); #include #include -#ifdef IPSEC -#include -#include -#include -#include -#endif /* IPSEC */ +#include #ifdef SCTP #include #include @@ -337,6 +333,21 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, } } +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + /* + * IPSec checking which handles several cases. + * FAST IPSEC: We re-injected the packet. + * XXX: need scope argument. + */ + if (IPSEC_ENABLED(ipv6)) { + if ((error = IPSEC_OUTPUT(ipv6, m, inp)) != 0) { + if (error == EINPROGRESS) + error = 0; + goto done; + } + } +#endif /* IPSEC */ + bzero(&exthdrs, sizeof(exthdrs)); if (opt) { /* Hop-by-Hop options header */ @@ -361,24 +372,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); } -#ifdef IPSEC - /* - * IPSec checking which handles several cases. - * FAST IPSEC: We re-injected the packet. - * XXX: need scope argument. - */ - switch(ip6_ipsec_output(&m, inp, &error)) - { - case 1: /* Bad packet */ - goto freehdrs; - case -1: /* IPSec done */ - goto done; - case 0: /* No IPSec */ - default: - break; - } -#endif /* IPSEC */ - /* * Calculate the total length of the extension header chain. * Keep the length of the unfragmentable part for fragmentation. @@ -503,8 +496,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, if (ro == NULL) { ro = &ip6route; bzero((caddr_t)ro, sizeof(*ro)); - } else - ro->ro_flags |= RT_LLE_CACHE; + } ro_pmtu = ro; if (opt && opt->ip6po_rthdr) ro = &opt->ip6po_route; @@ -956,8 +948,23 @@ passout: m->m_pkthdr.len); ifa_free(&ia6->ia_ifa); } +#ifdef RATELIMIT + if (inp != NULL) { + if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) + in_pcboutput_txrtlmt(inp, ifp, m); + /* stamp send tag on mbuf */ + m->m_pkthdr.snd_tag = inp->inp_snd_tag; + } else { + m->m_pkthdr.snd_tag = NULL; + } +#endif error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro); +#ifdef RATELIMIT + /* check for route change */ + if (error == EAGAIN) + in_pcboutput_eagain(inp); +#endif goto done; } @@ -1056,8 +1063,23 @@ sendorfree: counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } +#ifdef RATELIMIT + if (inp != NULL) { + if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) + in_pcboutput_txrtlmt(inp, ifp, m); + /* stamp send tag on mbuf */ + m->m_pkthdr.snd_tag = inp->inp_snd_tag; + } else { + m->m_pkthdr.snd_tag = NULL; + } +#endif error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro); +#ifdef RATELIMIT + /* check for route change */ + if (error == EAGAIN) + in_pcboutput_eagain(inp); +#endif } else m_freem(m); } @@ -1443,6 +1465,16 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) INP_WUNLOCK(in6p); error = 0; break; + case SO_MAX_PACING_RATE: +#ifdef RATELIMIT + INP_WLOCK(in6p); + in6p->inp_flags2 |= INP_RATE_LIMIT_CHANGED; + INP_WUNLOCK(in6p); + error = 0; +#else + error = EOPNOTSUPP; +#endif + break; default: break; } @@ -1514,6 +1546,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) #endif case IPV6_V6ONLY: case IPV6_AUTOFLOWLABEL: + case IPV6_ORIGDSTADDR: case IPV6_BINDANY: case IPV6_BINDMULTI: #ifdef RSS @@ -1699,6 +1732,9 @@ do { \ OPTSET(IN6P_AUTOFLOWLABEL); break; + case IPV6_ORIGDSTADDR: + OPTSET2(INP_ORIGDSTADDR, optval); + break; case IPV6_BINDANY: OPTSET(INP_BINDANY); break; @@ -1873,23 +1909,13 @@ do { \ INP_WUNLOCK(in6p); break; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) case IPV6_IPSEC_POLICY: - { - caddr_t req; - struct mbuf *m; - - if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ + if (IPSEC_ENABLED(ipv6)) { + error = IPSEC_PCBCTL(ipv6, in6p, sopt); break; - if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ - break; - req = mtod(m, caddr_t); - error = ipsec_set_policy(in6p, optname, req, - m->m_len, (sopt->sopt_td != NULL) ? - sopt->sopt_td->td_ucred : NULL); - m_freem(m); - break; - } + } + /* FALLTHROUGH */ #endif /* IPSEC */ default: @@ -1997,6 +2023,10 @@ do { \ optval = OPTBIT(IN6P_AUTOFLOWLABEL); break; + case IPV6_ORIGDSTADDR: + optval = OPTBIT2(INP_ORIGDSTADDR); + break; + case IPV6_BINDANY: optval = OPTBIT(INP_BINDANY); break; @@ -2114,37 +2144,14 @@ do { \ error = ip6_getmoptions(in6p, sopt); break; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) case IPV6_IPSEC_POLICY: - { - caddr_t req = NULL; - size_t len = 0; - struct mbuf *m = NULL; - struct mbuf **mp = &m; - size_t ovalsize = sopt->sopt_valsize; - caddr_t oval = (caddr_t)sopt->sopt_val; - - error = soopt_getm(sopt, &m); /* XXX */ - if (error != 0) + if (IPSEC_ENABLED(ipv6)) { + error = IPSEC_PCBCTL(ipv6, in6p, sopt); break; - error = soopt_mcopyin(sopt, m); /* XXX */ - if (error != 0) - break; - sopt->sopt_valsize = ovalsize; - sopt->sopt_val = oval; - if (m) { - req = mtod(m, caddr_t); - len = m->m_len; } - error = ipsec_get_policy(in6p, req, len, mp); - if (error == 0) - error = soopt_mcopyout(sopt, m); /* XXX */ - if (error == 0 && m) - m_freem(m); - break; - } + /* FALLTHROUGH */ #endif /* IPSEC */ - default: error = ENOPROTOOPT; break; diff --git a/freebsd/sys/netinet6/ip6_var.h b/freebsd/sys/netinet6/ip6_var.h index e52a3206..65bf1e69 100644 --- a/freebsd/sys/netinet6/ip6_var.h +++ b/freebsd/sys/netinet6/ip6_var.h @@ -41,7 +41,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet6/ip6protosw.h b/freebsd/sys/netinet6/ip6protosw.h index 9e80a698..edfbd0ab 100644 --- a/freebsd/sys/netinet6/ip6protosw.h +++ b/freebsd/sys/netinet6/ip6protosw.h @@ -41,7 +41,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet6/mld6.c b/freebsd/sys/netinet6/mld6.c index 26efa852..420b2b4b 100644 --- a/freebsd/sys/netinet6/mld6.c +++ b/freebsd/sys/netinet6/mld6.c @@ -46,7 +46,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet6/nd6.c b/freebsd/sys/netinet6/nd6.c index 757130b8..cb626e5a 100644 --- a/freebsd/sys/netinet6/nd6.c +++ b/freebsd/sys/netinet6/nd6.c @@ -159,6 +159,7 @@ nd6_lle_event(void *arg __unused, struct llentry *lle, int evt) struct sockaddr_dl gw; struct ifnet *ifp; int type; + int fibnum; LLE_WLOCK_ASSERT(lle); @@ -196,8 +197,9 @@ nd6_lle_event(void *arg __unused, struct llentry *lle, int evt) rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst; rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw; rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY; + fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS : ifp->if_fib; rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | ( - type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB); + type == RTM_ADD ? RTF_UP: 0), 0, fibnum); } /* @@ -1202,7 +1204,7 @@ nd6_purge(struct ifnet *ifp) if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* Refresh default router list. */ - defrouter_select(); + defrouter_select_fib(ifp->if_fib); } } @@ -1255,7 +1257,7 @@ static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) { struct nd_prefix *pr; - struct ifaddr *dstaddr; + struct ifaddr *ifa; struct rt_addrinfo info; struct sockaddr_in6 rt_key; const struct sockaddr *dst6; @@ -1289,9 +1291,6 @@ nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key; - /* Always use the default FIB here. XXME - why? */ - fibnum = RT_DEFAULT_FIB; - /* * If the address matches one of our addresses, * it should be a neighbor. @@ -1305,19 +1304,31 @@ restart: continue; if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { - /* Always use the default FIB here. */ dst6 = (const struct sockaddr *)&pr->ndpr_prefix; - genid = V_nd6_list_genid; - ND6_RUNLOCK(); - - /* Restore length field before retrying lookup */ - rt_key.sin6_len = sizeof(rt_key); - error = rib_lookup_info(fibnum, dst6, 0, 0, &info); + /* + * We only need to check all FIBs if add_addr_allfibs + * is unset. If set, checking any FIB will suffice. + */ + fibnum = V_rt_add_addr_allfibs ? rt_numfibs - 1 : 0; + for (; fibnum < rt_numfibs; fibnum++) { + genid = V_nd6_list_genid; + ND6_RUNLOCK(); - ND6_RLOCK(); - if (genid != V_nd6_list_genid) - goto restart; + /* + * Restore length field before + * retrying lookup + */ + rt_key.sin6_len = sizeof(rt_key); + error = rib_lookup_info(fibnum, dst6, 0, 0, + &info); + + ND6_RLOCK(); + if (genid != V_nd6_list_genid) + goto restart; + if (error == 0) + break; + } if (error != 0) continue; @@ -1348,13 +1359,18 @@ restart: * If the address is assigned on the node of the other side of * a p2p interface, the address should be a neighbor. */ - dstaddr = ifa_ifwithdstaddr((const struct sockaddr *)addr, RT_ALL_FIBS); - if (dstaddr != NULL) { - if (dstaddr->ifa_ifp == ifp) { - ifa_free(dstaddr); - return (1); + if (ifp->if_flags & IFF_POINTOPOINT) { + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != addr->sin6_family) + continue; + if (ifa->ifa_dstaddr != NULL && + sa_equal(addr, ifa->ifa_dstaddr)) { + IF_ADDR_RUNLOCK(ifp); + return 1; + } } - ifa_free(dstaddr); + IF_ADDR_RUNLOCK(ifp); } /* @@ -1487,7 +1503,7 @@ nd6_free(struct llentry **lnp, int gc) /* * We need to unlock to avoid a LOR with rt6_flush() with the * rnh and for the calls to pfxlist_onlink_check() and - * defrouter_select() in the block further down for calls + * defrouter_select_fib() in the block further down for calls * into nd6_lookup(). We still hold a ref. */ LLE_WUNLOCK(ln); @@ -1502,7 +1518,7 @@ nd6_free(struct llentry **lnp, int gc) if (dr) { /* - * Since defrouter_select() does not affect the + * Since defrouter_select_fib() does not affect the * on-link determination and MIP6 needs the check * before the default router selection, we perform * the check now. @@ -1512,7 +1528,7 @@ nd6_free(struct llentry **lnp, int gc) /* * Refresh default router list. */ - defrouter_select(); + defrouter_select_fib(dr->ifp->if_fib); } /* @@ -2106,11 +2122,11 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, * Question: can we restrict the first condition to the "is_newentry" * case? * XXX: when we hear an RA from a new router with the link-layer - * address option, defrouter_select() is called twice, since + * address option, defrouter_select_fib() is called twice, since * defrtrlist_update called the function as well. However, I believe * we can compromise the overhead, since it only happens the first * time. - * XXX: although defrouter_select() should not have a bad effect + * XXX: although defrouter_select_fib() should not have a bad effect * for those are not autoconfigured hosts, we explicitly avoid such * cases for safety. */ @@ -2119,7 +2135,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, /* * guaranteed recursion */ - defrouter_select(); + defrouter_select_fib(ifp->if_fib); } } @@ -2261,7 +2277,6 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m, case IFT_ETHER: case IFT_FDDI: case IFT_L2VLAN: - case IFT_IEEE80211: case IFT_BRIDGE: case IFT_ISO88025: ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr, @@ -2529,7 +2544,6 @@ nd6_need_cache(struct ifnet *ifp) case IFT_FDDI: case IFT_IEEE1394: case IFT_L2VLAN: - case IFT_IEEE80211: case IFT_INFINIBAND: case IFT_BRIDGE: case IFT_PROPVIRTUAL: diff --git a/freebsd/sys/netinet6/nd6.h b/freebsd/sys/netinet6/nd6.h index 9b9fa3d1..243e9548 100644 --- a/freebsd/sys/netinet6/nd6.h +++ b/freebsd/sys/netinet6/nd6.h @@ -469,6 +469,7 @@ void nd6_dad_stop(struct ifaddr *); void nd6_rs_input(struct mbuf *, int, int); void nd6_ra_input(struct mbuf *, int, int); void defrouter_reset(void); +void defrouter_select_fib(int fibnum); void defrouter_select(void); void defrouter_ref(struct nd_defrouter *); void defrouter_rele(struct nd_defrouter *); diff --git a/freebsd/sys/netinet6/nd6_nbr.c b/freebsd/sys/netinet6/nd6_nbr.c index e30dca8e..4fece39e 100644 --- a/freebsd/sys/netinet6/nd6_nbr.c +++ b/freebsd/sys/netinet6/nd6_nbr.c @@ -264,8 +264,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) bzero(&info, sizeof(info)); info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; - /* Always use the default FIB. */ - if (rib_lookup_info(RT_DEFAULT_FIB, (struct sockaddr *)&dst6, + if (rib_lookup_info(ifp->if_fib, (struct sockaddr *)&dst6, 0, 0, &info) == 0) { if ((info.rti_flags & RTF_ANNOUNCE) != 0 && rt_gateway.sdl_family == AF_LINK) { @@ -487,7 +486,7 @@ nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6, uint32_t scopeid; in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid); - error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, + error = in6_selectsrc_addr(fibnum, &dst6, scopeid, ifp, &src6, NULL); if (error) { char ip6buf[INET6_ADDRSTRLEN]; @@ -984,7 +983,7 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, * Select a source whose scope is the same as that of the dest. */ in6_splitscope(&daddr6, &dst6, &scopeid); - error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, + error = in6_selectsrc_addr(fibnum, &dst6, scopeid, ifp, &src6, NULL); if (error) { char ip6buf[INET6_ADDRSTRLEN]; @@ -1088,7 +1087,6 @@ nd6_ifptomac(struct ifnet *ifp) case IFT_FDDI: case IFT_IEEE1394: case IFT_L2VLAN: - case IFT_IEEE80211: case IFT_INFINIBAND: case IFT_BRIDGE: case IFT_ISO88025: @@ -1459,7 +1457,6 @@ nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp) case IFT_FDDI: case IFT_ATM: case IFT_IEEE1394: - case IFT_IEEE80211: case IFT_INFINIBAND: in6 = ia->ia_addr.sin6_addr; if (in6_get_hw_ifid(ifp, &in6) == 0 && diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c index f1ef143e..6e8d330f 100644 --- a/freebsd/sys/netinet6/nd6_rtr.c +++ b/freebsd/sys/netinet6/nd6_rtr.c @@ -502,7 +502,7 @@ defrouter_addreq(struct nd_defrouter *new) error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, - RTF_GATEWAY, &newrt, RT_DEFAULT_FIB); + RTF_GATEWAY, &newrt, new->ifp->if_fib); if (newrt) { nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ RTFREE(newrt); @@ -553,8 +553,8 @@ defrouter_rele(struct nd_defrouter *dr) /* * Remove the default route for a given router. - * This is just a subroutine function for defrouter_select(), and should - * not be called from anywhere else. + * This is just a subroutine function for defrouter_select_fib(), and + * should not be called from anywhere else. */ static void defrouter_delreq(struct nd_defrouter *dr) @@ -573,7 +573,7 @@ defrouter_delreq(struct nd_defrouter *dr) in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def, (struct sockaddr *)&gate, - (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, RT_DEFAULT_FIB); + (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, dr->ifp->if_fib); if (oldrt) { nd6_rtmsg(RTM_DELETE, oldrt); RTFREE(oldrt); @@ -700,11 +700,11 @@ defrouter_del(struct nd_defrouter *dr) /* * If the router is the primary one, choose a new one. - * Note that defrouter_select() will remove the current gateway - * from the routing table. + * Note that defrouter_select_fib() will remove the current + * gateway from the routing table. */ if (deldr) - defrouter_select(); + defrouter_select_fib(deldr->ifp->if_fib); /* * Release the list reference. @@ -732,13 +732,23 @@ defrouter_del(struct nd_defrouter *dr) * even when the multipath routing is available, because we're not sure about * the benefits for stub hosts comparing to the risk of making the code * complicated and the possibility of introducing bugs. + * + * We maintain a single list of routers for multiple FIBs, only considering one + * at a time based on the receiving interface's FIB. If @fibnum is RT_ALL_FIBS, + * we do the whole thing multiple times. */ void -defrouter_select(void) +defrouter_select_fib(int fibnum) { struct nd_defrouter *dr, *selected_dr, *installed_dr; struct llentry *ln = NULL; + if (fibnum == RT_ALL_FIBS) { + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + defrouter_select_fib(fibnum); + } + } + ND6_RLOCK(); /* * Let's handle easy case (3) first: @@ -757,7 +767,7 @@ defrouter_select(void) selected_dr = installed_dr = NULL; TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { IF_AFDATA_RLOCK(dr->ifp); - if (selected_dr == NULL && + if (selected_dr == NULL && dr->ifp->if_fib == fibnum && (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln)) { selected_dr = dr; @@ -769,14 +779,17 @@ defrouter_select(void) ln = NULL; } - if (dr->installed) { + if (dr->installed && dr->ifp->if_fib == fibnum) { if (installed_dr == NULL) { installed_dr = dr; defrouter_ref(installed_dr); } else { - /* this should not happen. warn for diagnosis. */ - log(LOG_ERR, - "defrouter_select: more than one router is installed\n"); + /* + * this should not happen. + * warn for diagnosis. + */ + log(LOG_ERR, "defrouter_select_fib: more than " + "one router is installed\n"); } } } @@ -791,14 +804,24 @@ defrouter_select(void) if (selected_dr == NULL) { if (installed_dr == NULL || TAILQ_NEXT(installed_dr, dr_entry) == NULL) - selected_dr = TAILQ_FIRST(&V_nd_defrouter); + dr = TAILQ_FIRST(&V_nd_defrouter); else - selected_dr = TAILQ_NEXT(installed_dr, dr_entry); - defrouter_ref(selected_dr); + dr = TAILQ_NEXT(installed_dr, dr_entry); + + /* Ensure we select a router for this FIB. */ + TAILQ_FOREACH_FROM(dr, &V_nd_defrouter, dr_entry) { + if (dr->ifp->if_fib == fibnum) { + selected_dr = dr; + defrouter_ref(selected_dr); + break; + } + } } else if (installed_dr != NULL) { IF_AFDATA_RLOCK(installed_dr->ifp); - if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) && + if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, + installed_dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln) && + installed_dr->ifp->if_fib == fibnum && rtpref(selected_dr) <= rtpref(installed_dr)) { defrouter_rele(selected_dr); selected_dr = installed_dr; @@ -810,18 +833,30 @@ defrouter_select(void) ND6_RUNLOCK(); /* - * If the selected router is different than the installed one, - * remove the installed router and install the selected one. - * Note that the selected router is never NULL here. + * If we selected a router for this FIB and it's different + * than the installed one, remove the installed router and + * install the selected one in its place. */ if (installed_dr != selected_dr) { if (installed_dr != NULL) { defrouter_delreq(installed_dr); defrouter_rele(installed_dr); } - defrouter_addreq(selected_dr); + if (selected_dr != NULL) + defrouter_addreq(selected_dr); } - defrouter_rele(selected_dr); + if (selected_dr != NULL) + defrouter_rele(selected_dr); +} + +/* + * Maintain old KPI for default router selection. + * If unspecified, we can re-select routers for all FIBs. + */ +void +defrouter_select(void) +{ + defrouter_select_fib(RT_ALL_FIBS); } /* @@ -944,7 +979,7 @@ restart: V_nd6_list_genid++; ND6_WUNLOCK(); - defrouter_select(); + defrouter_select_fib(new->ifp->if_fib); return (n); } @@ -1733,7 +1768,7 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) struct rtentry *rt; struct sockaddr_in6 mask6; u_long rtflags; - int error, a_failure, fibnum; + int error, a_failure, fibnum, maxfib; /* * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs. @@ -1744,8 +1779,15 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) mask6.sin6_addr = pr->ndpr_mask; rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP; + if(V_rt_add_addr_allfibs) { + fibnum = 0; + maxfib = rt_numfibs; + } else { + fibnum = ifa->ifa_ifp->if_fib; + maxfib = fibnum + 1; + } a_failure = 0; - for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + for (; fibnum < maxfib; fibnum++) { rt = NULL; error = in6_rtrequest(RTM_ADD, @@ -1833,6 +1875,10 @@ nd6_prefix_onlink(struct nd_prefix *pr) if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0) continue; + if (!V_rt_add_addr_allfibs && + opr->ndpr_ifp->if_fib != pr->ndpr_ifp->if_fib) + continue; + if (opr->ndpr_plen == pr->ndpr_plen && in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) { @@ -1893,7 +1939,7 @@ nd6_prefix_offlink(struct nd_prefix *pr) struct rtentry *rt; char ip6buf[INET6_ADDRSTRLEN]; uint64_t genid; - int fibnum, a_failure; + int fibnum, maxfib, a_failure; ND6_ONLINK_LOCK_ASSERT(); ND6_UNLOCK_ASSERT(); @@ -1911,8 +1957,16 @@ nd6_prefix_offlink(struct nd_prefix *pr) mask6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr)); + if (V_rt_add_addr_allfibs) { + fibnum = 0; + maxfib = rt_numfibs; + } else { + fibnum = ifp->if_fib; + maxfib = fibnum + 1; + } + a_failure = 0; - for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + for (; fibnum < maxfib; fibnum++) { rt = NULL; error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, (struct sockaddr *)&mask6, 0, &rt, fibnum); diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c index 5b0577d3..0c73429b 100644 --- a/freebsd/sys/netinet6/raw_ip6.c +++ b/freebsd/sys/netinet6/raw_ip6.c @@ -42,7 +42,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -106,10 +106,7 @@ __FBSDID("$FreeBSD$"); #include #include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ +#include #include @@ -171,7 +168,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto) RIP6STAT_INC(rip6s_ipackets); - init_sin6(&fromsa, m); /* general init */ + init_sin6(&fromsa, m, 0); /* general init */ ifp = m->m_pkthdr.rcvif; @@ -260,14 +257,18 @@ rip6_input(struct mbuf **mp, int *offp, int proto) if (last != NULL) { struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Check AH/ESP integrity. */ - if (n && ipsec6_in_reject(n, last)) { - m_freem(n); - /* Do not inject data into pcb. */ - } else + if (IPSEC_ENABLED(ipv6)) { + if (n != NULL && + IPSEC_CHECK_POLICY(ipv6, n, last) != 0) { + m_freem(n); + /* Do not inject data into pcb. */ + n = NULL; + } + } #endif /* IPSEC */ if (n) { if (last->inp_flags & INP_CONTROLOPTS || @@ -291,11 +292,12 @@ rip6_input(struct mbuf **mp, int *offp, int proto) last = in6p; } INP_INFO_RUNLOCK(&V_ripcbinfo); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Check AH/ESP integrity. */ - if ((last != NULL) && ipsec6_in_reject(m, last)) { + if (IPSEC_ENABLED(ipv6) && last != NULL && + IPSEC_CHECK_POLICY(ipv6, m, last) != 0) { m_freem(m); IP6STAT_DEC(ip6s_delivered); /* Do not inject data into pcb. */ diff --git a/freebsd/sys/netinet6/sctp6_usrreq.c b/freebsd/sys/netinet6/sctp6_usrreq.c index 751c18fd..03e20b18 100644 --- a/freebsd/sys/netinet6/sctp6_usrreq.c +++ b/freebsd/sys/netinet6/sctp6_usrreq.c @@ -57,11 +57,6 @@ __FBSDID("$FreeBSD$"); #include #include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ - extern struct protosw inetsw[]; int @@ -560,10 +555,6 @@ sctp6_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNU */ inp6->inp_ip_ttl = MODULE_GLOBAL(ip_defttl); #endif - /* - * Hmm what about the IPSEC stuff that is missing here but in - * sctp_attach()? - */ SCTP_INP_WUNLOCK(inp); return (0); } diff --git a/freebsd/sys/netinet6/tcp6_var.h b/freebsd/sys/netinet6/tcp6_var.h index 5cb04f99..1ef1eb95 100644 --- a/freebsd/sys/netinet6/tcp6_var.h +++ b/freebsd/sys/netinet6/tcp6_var.h @@ -39,7 +39,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c index 2f950e62..0f5c6bd9 100644 --- a/freebsd/sys/netinet6/udp6_usrreq.c +++ b/freebsd/sys/netinet6/udp6_usrreq.c @@ -50,7 +50,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -122,10 +122,7 @@ __FBSDID("$FreeBSD$"); #include #include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ +#include #include @@ -142,7 +139,7 @@ udp6_append(struct inpcb *inp, struct mbuf *n, int off, struct sockaddr_in6 *fromsa) { struct socket *so; - struct mbuf *opts; + struct mbuf *opts = NULL, *tmp_opts; struct udpcb *up; INP_LOCK_ASSERT(inp); @@ -154,16 +151,18 @@ udp6_append(struct inpcb *inp, struct mbuf *n, int off, if (up->u_tun_func != NULL) { in_pcbref(inp); INP_RUNLOCK(inp); - (*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa, + (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&fromsa[0], up->u_tun_ctx); INP_RLOCK(inp); return (in_pcbrele_rlocked(inp)); } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* Check AH/ESP integrity. */ - if (ipsec6_in_reject(n, inp)) { - m_freem(n); - return (0); + if (IPSEC_ENABLED(ipv6)) { + if (IPSEC_CHECK_POLICY(ipv6, n, inp) != 0) { + m_freem(n); + return (0); + } } #endif /* IPSEC */ #ifdef MAC @@ -176,11 +175,23 @@ udp6_append(struct inpcb *inp, struct mbuf *n, int off, if (inp->inp_flags & INP_CONTROLOPTS || inp->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(inp, n, &opts); + if ((inp->inp_vflag & INP_IPV6) && (inp->inp_flags2 & INP_ORIGDSTADDR)) { + tmp_opts = sbcreatecontrol((caddr_t)&fromsa[1], + sizeof(struct sockaddr_in6), IPV6_ORIGDSTADDR, IPPROTO_IPV6); + if (tmp_opts) { + if (opts) { + tmp_opts->m_next = opts; + opts = tmp_opts; + } else + opts = tmp_opts; + } + + } m_adj(n, off + sizeof(struct udphdr)); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_rcv); - if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n, + if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)&fromsa[0], n, opts) == 0) { SOCKBUF_UNLOCK(&so->so_rcv); m_freem(n); @@ -205,7 +216,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) int off = *offp; int cscov_partial; int plen, ulen; - struct sockaddr_in6 fromsa; + struct sockaddr_in6 fromsa[2]; struct m_tag *fwd_tag; uint16_t uh_sum; uint8_t nxt; @@ -280,8 +291,10 @@ udp6_input(struct mbuf **mp, int *offp, int proto) /* * Construct sockaddr format source address. */ - init_sin6(&fromsa, m); - fromsa.sin6_port = uh->uh_sport; + init_sin6(&fromsa[0], m, 0); + fromsa[0].sin6_port = uh->uh_sport; + init_sin6(&fromsa[1], m, 1); + fromsa[1].sin6_port = uh->uh_dport; pcbinfo = udp_get_inpcbinfo(nxt); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { @@ -352,7 +365,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) blocked = im6o_mc_filter(imo, ifp, (struct sockaddr *)&mcaddr, - (struct sockaddr *)&fromsa); + (struct sockaddr *)&fromsa[0]); if (blocked != MCAST_PASS) { if (blocked == MCAST_NOTGMEMBER) IP6STAT_INC(ip6s_notmember); @@ -373,7 +386,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) INP_RLOCK(last); UDP_PROBE(receive, NULL, last, ip6, last, uh); - if (udp6_append(last, n, off, &fromsa)) + if (udp6_append(last, n, off, fromsa)) goto inp_lost; INP_RUNLOCK(last); } @@ -405,7 +418,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) INP_RLOCK(last); INP_INFO_RUNLOCK(pcbinfo); UDP_PROBE(receive, NULL, last, ip6, last, uh); - if (udp6_append(last, m, off, &fromsa) == 0) + if (udp6_append(last, m, off, fromsa) == 0) INP_RUNLOCK(last); inp_lost: return (IPPROTO_DONE); @@ -485,7 +498,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) } } UDP_PROBE(receive, NULL, inp, ip6, inp, uh); - if (udp6_append(inp, m, off, &fromsa) == 0) + if (udp6_append(inp, m, off, fromsa) == 0) INP_RUNLOCK(inp); return (IPPROTO_DONE); diff --git a/freebsd/sys/netinet6/udp6_var.h b/freebsd/sys/netinet6/udp6_var.h index cdab98b0..8a2afa38 100644 --- a/freebsd/sys/netinet6/udp6_var.h +++ b/freebsd/sys/netinet6/udp6_var.h @@ -40,7 +40,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/netipsec/ah_var.h b/freebsd/sys/netipsec/ah_var.h index 812fe2dc..9b992c07 100644 --- a/freebsd/sys/netipsec/ah_var.h +++ b/freebsd/sys/netipsec/ah_var.h @@ -48,37 +48,39 @@ #define AH_ALG_MAX 16 struct ahstat { - u_int32_t ahs_hdrops; /* Packet shorter than header shows */ - u_int32_t ahs_nopf; /* Protocol family not supported */ - u_int32_t ahs_notdb; - u_int32_t ahs_badkcr; - u_int32_t ahs_badauth; - u_int32_t ahs_noxform; - u_int32_t ahs_qfull; - u_int32_t ahs_wrap; - u_int32_t ahs_replay; - u_int32_t ahs_badauthl; /* Bad authenticator length */ - u_int32_t ahs_input; /* Input AH packets */ - u_int32_t ahs_output; /* Output AH packets */ - u_int32_t ahs_invalid; /* Trying to use an invalid TDB */ - u_int64_t ahs_ibytes; /* Input bytes */ - u_int64_t ahs_obytes; /* Output bytes */ - u_int32_t ahs_toobig; /* Packet got larger than IP_MAXPACKET */ - u_int32_t ahs_pdrops; /* Packet blocked due to policy */ - u_int32_t ahs_crypto; /* Crypto processing failure */ - u_int32_t ahs_tunnel; /* Tunnel sanity check failure */ - u_int32_t ahs_hist[AH_ALG_MAX]; /* Per-algorithm op count */ + uint64_t ahs_hdrops; /* Packet shorter than header shows */ + uint64_t ahs_nopf; /* Protocol family not supported */ + uint64_t ahs_notdb; + uint64_t ahs_badkcr; + uint64_t ahs_badauth; + uint64_t ahs_noxform; + uint64_t ahs_qfull; + uint64_t ahs_wrap; + uint64_t ahs_replay; + uint64_t ahs_badauthl; /* Bad authenticator length */ + uint64_t ahs_input; /* Input AH packets */ + uint64_t ahs_output; /* Output AH packets */ + uint64_t ahs_invalid; /* Trying to use an invalid TDB */ + uint64_t ahs_ibytes; /* Input bytes */ + uint64_t ahs_obytes; /* Output bytes */ + uint64_t ahs_toobig; /* Packet got larger than IP_MAXPACKET */ + uint64_t ahs_pdrops; /* Packet blocked due to policy */ + uint64_t ahs_crypto; /* Crypto processing failure */ + uint64_t ahs_tunnel; /* Tunnel sanity check failure */ + uint64_t ahs_hist[AH_ALG_MAX]; /* Per-algorithm op count */ }; #ifdef _KERNEL +#include + VNET_DECLARE(int, ah_enable); VNET_DECLARE(int, ah_cleartos); -VNET_DECLARE(struct ahstat, ahstat); +VNET_PCPUSTAT_DECLARE(struct ahstat, ahstat); -#define AHSTAT_ADD(name, val) V_ahstat.name += (val) +#define AHSTAT_ADD(name, val) \ + VNET_PCPUSTAT_ADD(struct ahstat, ahstat, name , (val)) #define AHSTAT_INC(name) AHSTAT_ADD(name, 1) #define V_ah_enable VNET(ah_enable) #define V_ah_cleartos VNET(ah_cleartos) -#define V_ahstat VNET(ahstat) #endif /* _KERNEL */ #endif /*_NETIPSEC_AH_VAR_H_*/ diff --git a/freebsd/sys/netipsec/esp.h b/freebsd/sys/netipsec/esp.h index eb373970..8eb09630 100644 --- a/freebsd/sys/netipsec/esp.h +++ b/freebsd/sys/netipsec/esp.h @@ -42,8 +42,7 @@ struct esp { /*variable size, 32bit bound*/ /* Initialization Vector */ /*variable size*/ /* Payload data */ /*variable size*/ /* padding */ - /*8bit*/ /* pad size */ - /*8bit*/ /* next header */ + /*8bit*/ /* pad length */ /*8bit*/ /* next header */ /*variable size, 32bit bound*/ /* Authentication data (new IPsec) */ }; @@ -53,8 +52,7 @@ struct newesp { u_int32_t esp_seq; /* Sequence number */ /*variable size*/ /* (IV and) Payload data */ /*variable size*/ /* padding */ - /*8bit*/ /* pad size */ - /*8bit*/ /* next header */ + /*8bit*/ /* pad length */ /*8bit*/ /* next header */ /*variable size, 32bit bound*/ /* Authentication data */ }; diff --git a/freebsd/sys/netipsec/esp_var.h b/freebsd/sys/netipsec/esp_var.h index c6133614..48240418 100644 --- a/freebsd/sys/netipsec/esp_var.h +++ b/freebsd/sys/netipsec/esp_var.h @@ -48,36 +48,38 @@ #define ESP_ALG_MAX 256 /* NB: could be < but skipjack is 249 */ struct espstat { - u_int32_t esps_hdrops; /* Packet shorter than header shows */ - u_int32_t esps_nopf; /* Protocol family not supported */ - u_int32_t esps_notdb; - u_int32_t esps_badkcr; - u_int32_t esps_qfull; - u_int32_t esps_noxform; - u_int32_t esps_badilen; - u_int32_t esps_wrap; /* Replay counter wrapped around */ - u_int32_t esps_badenc; /* Bad encryption detected */ - u_int32_t esps_badauth; /* Only valid for transforms with auth */ - u_int32_t esps_replay; /* Possible packet replay detected */ - u_int32_t esps_input; /* Input ESP packets */ - u_int32_t esps_output; /* Output ESP packets */ - u_int32_t esps_invalid; /* Trying to use an invalid TDB */ - u_int64_t esps_ibytes; /* Input bytes */ - u_int64_t esps_obytes; /* Output bytes */ - u_int32_t esps_toobig; /* Packet got larger than IP_MAXPACKET */ - u_int32_t esps_pdrops; /* Packet blocked due to policy */ - u_int32_t esps_crypto; /* Crypto processing failure */ - u_int32_t esps_tunnel; /* Tunnel sanity check failure */ - u_int32_t esps_hist[ESP_ALG_MAX]; /* Per-algorithm op count */ + uint64_t esps_hdrops; /* Packet shorter than header shows */ + uint64_t esps_nopf; /* Protocol family not supported */ + uint64_t esps_notdb; + uint64_t esps_badkcr; + uint64_t esps_qfull; + uint64_t esps_noxform; + uint64_t esps_badilen; + uint64_t esps_wrap; /* Replay counter wrapped around */ + uint64_t esps_badenc; /* Bad encryption detected */ + uint64_t esps_badauth; /* Only valid for transforms with auth */ + uint64_t esps_replay; /* Possible packet replay detected */ + uint64_t esps_input; /* Input ESP packets */ + uint64_t esps_output; /* Output ESP packets */ + uint64_t esps_invalid; /* Trying to use an invalid TDB */ + uint64_t esps_ibytes; /* Input bytes */ + uint64_t esps_obytes; /* Output bytes */ + uint64_t esps_toobig; /* Packet got larger than IP_MAXPACKET */ + uint64_t esps_pdrops; /* Packet blocked due to policy */ + uint64_t esps_crypto; /* Crypto processing failure */ + uint64_t esps_tunnel; /* Tunnel sanity check failure */ + uint64_t esps_hist[ESP_ALG_MAX]; /* Per-algorithm op count */ }; #ifdef _KERNEL +#include + VNET_DECLARE(int, esp_enable); -VNET_DECLARE(struct espstat, espstat); +VNET_PCPUSTAT_DECLARE(struct espstat, espstat); -#define ESPSTAT_ADD(name, val) V_espstat.name += (val) +#define ESPSTAT_ADD(name, val) \ + VNET_PCPUSTAT_ADD(struct espstat, espstat, name, (val)) #define ESPSTAT_INC(name) ESPSTAT_ADD(name, 1) #define V_esp_enable VNET(esp_enable) -#define V_espstat VNET(espstat) #endif /* _KERNEL */ #endif /*_NETIPSEC_ESP_VAR_H_*/ diff --git a/freebsd/sys/netipsec/ipcomp_var.h b/freebsd/sys/netipsec/ipcomp_var.h index ee15598f..5062c9dd 100644 --- a/freebsd/sys/netipsec/ipcomp_var.h +++ b/freebsd/sys/netipsec/ipcomp_var.h @@ -41,36 +41,37 @@ */ #define IPCOMP_ALG_MAX 8 -#define IPCOMPSTAT_VERSION 1 +#define IPCOMPSTAT_VERSION 2 struct ipcompstat { - u_int32_t ipcomps_hdrops; /* Packet shorter than header shows */ - u_int32_t ipcomps_nopf; /* Protocol family not supported */ - u_int32_t ipcomps_notdb; - u_int32_t ipcomps_badkcr; - u_int32_t ipcomps_qfull; - u_int32_t ipcomps_noxform; - u_int32_t ipcomps_wrap; - u_int32_t ipcomps_input; /* Input IPcomp packets */ - u_int32_t ipcomps_output; /* Output IPcomp packets */ - u_int32_t ipcomps_invalid;/* Trying to use an invalid TDB */ - u_int64_t ipcomps_ibytes; /* Input bytes */ - u_int64_t ipcomps_obytes; /* Output bytes */ - u_int32_t ipcomps_toobig; /* Packet got > IP_MAXPACKET */ - u_int32_t ipcomps_pdrops; /* Packet blocked due to policy */ - u_int32_t ipcomps_crypto; /* "Crypto" processing failure */ - u_int32_t ipcomps_hist[IPCOMP_ALG_MAX];/* Per-algorithm op count */ - u_int32_t version; /* Version of this structure. */ - u_int32_t ipcomps_threshold; /* Packet < comp. algo. threshold. */ - u_int32_t ipcomps_uncompr; /* Compression was useles. */ + uint64_t ipcomps_hdrops; /* Packet shorter than header shows */ + uint64_t ipcomps_nopf; /* Protocol family not supported */ + uint64_t ipcomps_notdb; + uint64_t ipcomps_badkcr; + uint64_t ipcomps_qfull; + uint64_t ipcomps_noxform; + uint64_t ipcomps_wrap; + uint64_t ipcomps_input; /* Input IPcomp packets */ + uint64_t ipcomps_output; /* Output IPcomp packets */ + uint64_t ipcomps_invalid;/* Trying to use an invalid TDB */ + uint64_t ipcomps_ibytes; /* Input bytes */ + uint64_t ipcomps_obytes; /* Output bytes */ + uint64_t ipcomps_toobig; /* Packet got > IP_MAXPACKET */ + uint64_t ipcomps_pdrops; /* Packet blocked due to policy */ + uint64_t ipcomps_crypto; /* "Crypto" processing failure */ + uint64_t ipcomps_hist[IPCOMP_ALG_MAX];/* Per-algorithm op count */ + uint64_t ipcomps_threshold; /* Packet < comp. algo. threshold. */ + uint64_t ipcomps_uncompr; /* Compression was useles. */ }; #ifdef _KERNEL +#include + VNET_DECLARE(int, ipcomp_enable); -VNET_DECLARE(struct ipcompstat, ipcompstat); +VNET_PCPUSTAT_DECLARE(struct ipcompstat, ipcompstat); -#define IPCOMPSTAT_ADD(name, val) V_ipcompstat.name += (val) +#define IPCOMPSTAT_ADD(name, val) \ + VNET_PCPUSTAT_ADD(struct ipcompstat, ipcompstat, name, (val)) #define IPCOMPSTAT_INC(name) IPCOMPSTAT_ADD(name, 1) #define V_ipcomp_enable VNET(ipcomp_enable) -#define V_ipcompstat VNET(ipcompstat) #endif /* _KERNEL */ #endif /*_NETIPSEC_IPCOMP_VAR_H_*/ diff --git a/freebsd/sys/netipsec/ipip_var.h b/freebsd/sys/netipsec/ipip_var.h deleted file mode 100644 index 415d5c10..00000000 --- a/freebsd/sys/netipsec/ipip_var.h +++ /dev/null @@ -1,70 +0,0 @@ -/* $FreeBSD$ */ -/* $OpenBSD: ip_ipip.h,v 1.5 2002/06/09 16:26:10 itojun Exp $ */ -/*- - * The authors of this code are John Ioannidis (ji@tla.org), - * Angelos D. Keromytis (kermit@csd.uch.gr) and - * Niels Provos (provos@physnet.uni-hamburg.de). - * - * The original version of this code was written by John Ioannidis - * for BSD/OS in Athens, Greece, in November 1995. - * - * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, - * by Angelos D. Keromytis. - * - * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis - * and Niels Provos. - * - * Additional features in 1999 by Angelos D. Keromytis. - * - * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, - * Angelos D. Keromytis and Niels Provos. - * Copyright (c) 2001, Angelos D. Keromytis. - * - * Permission to use, copy, and modify this software with or without fee - * is hereby granted, provided that this entire notice is included in - * all copies of any software which is or includes a copy or - * modification of this software. - * You may use this code under the GNU public license if you so wish. Please - * contribute changes back to the authors under this freer than GPL license - * so that we may further the use of strong encryption without limitations to - * all. - * - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE - * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR - * PURPOSE. - */ - -#ifndef _NETINET_IPIP_H_ -#define _NETINET_IPIP_H_ - -/* - * IP-inside-IP processing. - * Not quite all the functionality of RFC-1853, but the main idea is there. - */ - -struct ipipstat -{ - u_int32_t ipips_ipackets; /* total input packets */ - u_int32_t ipips_opackets; /* total output packets */ - u_int32_t ipips_hdrops; /* packet shorter than header shows */ - u_int32_t ipips_qfull; - u_int64_t ipips_ibytes; - u_int64_t ipips_obytes; - u_int32_t ipips_pdrops; /* packet dropped due to policy */ - u_int32_t ipips_spoof; /* IP spoofing attempts */ - u_int32_t ipips_family; /* Protocol family mismatch */ - u_int32_t ipips_unspec; /* Missing tunnel endpoint address */ -}; - -#ifdef _KERNEL -VNET_DECLARE(int, ipip_allow); -VNET_DECLARE(struct ipipstat, ipipstat); - -#define IPIPSTAT_ADD(name, val) V_ipipstat.name += (val) -#define IPIPSTAT_INC(name) IPIPSTAT_ADD(name, 1) -#define V_ipip_allow VNET(ipip_allow) -#define V_ipipstat VNET(ipipstat) -#endif /* _KERNEL */ -#endif /* _NETINET_IPIP_H_ */ diff --git a/freebsd/sys/netipsec/ipsec.c b/freebsd/sys/netipsec/ipsec.c index c1eed678..20aad0b6 100644 --- a/freebsd/sys/netipsec/ipsec.c +++ b/freebsd/sys/netipsec/ipsec.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -57,7 +58,8 @@ #include #include -#include +#include +#include #include #include @@ -88,6 +90,7 @@ #include #include /*XXX*/ #include +#include #include #include @@ -99,14 +102,14 @@ #include -#ifdef IPSEC_DEBUG -VNET_DEFINE(int, ipsec_debug) = 1; -#else -VNET_DEFINE(int, ipsec_debug) = 0; -#endif - /* NB: name changed so netstat doesn't use it. */ -VNET_DEFINE(struct ipsecstat, ipsec4stat); +VNET_PCPUSTAT_DEFINE(struct ipsecstat, ipsec4stat); +VNET_PCPUSTAT_SYSINIT(ipsec4stat); + +#ifdef VIMAGE +VNET_PCPUSTAT_SYSUNINIT(ipsec4stat); +#endif /* VIMAGE */ + VNET_DEFINE(int, ip4_ah_offsetmask) = 0; /* maybe IP_DF? */ /* DF bit on encap. 0: clear 1: set 2: copy */ VNET_DEFINE(int, ip4_ipsec_dfbit) = 0; @@ -114,11 +117,32 @@ VNET_DEFINE(int, ip4_esp_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_esp_net_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_ah_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_ah_net_deflev) = IPSEC_LEVEL_USE; -VNET_DEFINE(struct secpolicy, ip4_def_policy); /* ECN ignore(-1)/forbidden(0)/allowed(1) */ VNET_DEFINE(int, ip4_ipsec_ecn) = 0; VNET_DEFINE(int, ip4_esp_randpad) = -1; +static VNET_DEFINE(int, ip4_filtertunnel) = 0; +#define V_ip4_filtertunnel VNET(ip4_filtertunnel) +static VNET_DEFINE(int, check_policy_history) = 0; +#define V_check_policy_history VNET(check_policy_history) +static VNET_DEFINE(struct secpolicy *, def_policy) = NULL; +#define V_def_policy VNET(def_policy) +static int +sysctl_def_policy(SYSCTL_HANDLER_ARGS) +{ + int error, value; + + value = V_def_policy->policy; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error == 0) { + if (value != IPSEC_POLICY_DISCARD && + value != IPSEC_POLICY_NONE) + return (EINVAL); + V_def_policy->policy = value; + } + return (error); +} + /* * Crypto support requirements: * @@ -127,51 +151,63 @@ VNET_DEFINE(int, ip4_esp_randpad) = -1; * 0 take anything */ VNET_DEFINE(int, crypto_support) = CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE; +/* + * TCP/UDP checksum handling policy for transport mode NAT-T (RFC3948) + * + * 0 - auto: incrementally recompute, when checksum delta is known; + * if checksum delta isn't known, reset checksum to zero for UDP, + * and mark csum_flags as valid for TCP. + * 1 - fully recompute TCP/UDP checksum. + */ +VNET_DEFINE(int, natt_cksum_policy) = 0; FEATURE(ipsec, "Internet Protocol Security (IPsec)"); -#ifdef IPSEC_NAT_T FEATURE(ipsec_natt, "UDP Encapsulation of IPsec ESP Packets ('NAT-T')"); -#endif SYSCTL_DECL(_net_inet_ipsec); /* net.inet.ipsec */ -SYSCTL_VNET_INT(_net_inet_ipsec, IPSECCTL_DEF_POLICY, def_policy, - CTLFLAG_RW, &VNET_NAME(ip4_def_policy).policy, 0, +SYSCTL_PROC(_net_inet_ipsec, IPSECCTL_DEF_POLICY, def_policy, + CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW, 0, 0, sysctl_def_policy, "I", "IPsec default policy."); -SYSCTL_VNET_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, - CTLFLAG_RW, &VNET_NAME(ip4_esp_trans_deflev), 0, +SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_esp_trans_deflev), 0, "Default ESP transport mode level"); -SYSCTL_VNET_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev, - CTLFLAG_RW, &VNET_NAME(ip4_esp_net_deflev), 0, +SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_esp_net_deflev), 0, "Default ESP tunnel mode level."); -SYSCTL_VNET_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev, - CTLFLAG_RW, &VNET_NAME(ip4_ah_trans_deflev), 0, +SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_trans_deflev), 0, "AH transfer mode default level."); -SYSCTL_VNET_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev, - CTLFLAG_RW, &VNET_NAME(ip4_ah_net_deflev), 0, +SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_net_deflev), 0, "AH tunnel mode default level."); -SYSCTL_VNET_INT(_net_inet_ipsec, IPSECCTL_AH_CLEARTOS, ah_cleartos, - CTLFLAG_RW, &VNET_NAME(ah_cleartos), 0, - "If set clear type-of-service field when doing AH computation."); -SYSCTL_VNET_INT(_net_inet_ipsec, IPSECCTL_AH_OFFSETMASK, ah_offsetmask, - CTLFLAG_RW, &VNET_NAME(ip4_ah_offsetmask), 0, - "If not set clear offset field mask when doing AH computation."); -SYSCTL_VNET_INT(_net_inet_ipsec, IPSECCTL_DFBIT, dfbit, - CTLFLAG_RW, &VNET_NAME(ip4_ipsec_dfbit), 0, +SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_CLEARTOS, ah_cleartos, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ah_cleartos), 0, + "If set, clear type-of-service field when doing AH computation."); +SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_OFFSETMASK, ah_offsetmask, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_offsetmask), 0, + "If not set, clear offset field mask when doing AH computation."); +SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DFBIT, dfbit, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_dfbit), 0, "Do not fragment bit on encap."); -SYSCTL_VNET_INT(_net_inet_ipsec, IPSECCTL_ECN, ecn, - CTLFLAG_RW, &VNET_NAME(ip4_ipsec_ecn), 0, +SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ECN, ecn, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_ecn), 0, "Explicit Congestion Notification handling."); -SYSCTL_VNET_INT(_net_inet_ipsec, IPSECCTL_DEBUG, debug, - CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0, - "Enable IPsec debugging output when set."); -SYSCTL_VNET_INT(_net_inet_ipsec, OID_AUTO, crypto_support, - CTLFLAG_RW, &VNET_NAME(crypto_support), 0, +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, crypto_support, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(crypto_support), 0, "Crypto driver selection."); -SYSCTL_VNET_STRUCT(_net_inet_ipsec, OID_AUTO, ipsecstats, - CTLFLAG_RD, &VNET_NAME(ipsec4stat), ipsecstat, - "IPsec IPv4 statistics."); +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, check_policy_history, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(check_policy_history), 0, + "Use strict check of inbound packets to security policy compliance."); +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, natt_cksum_policy, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(natt_cksum_policy), 0, + "Method to fix TCP/UDP checksum for transport mode IPsec after NAT."); +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, filtertunnel, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_filtertunnel), 0, + "If set, filter packets from an IPsec tunnel."); +SYSCTL_VNET_PCPUSTAT(_net_inet_ipsec, OID_AUTO, ipsecstats, struct ipsecstat, + ipsec4stat, "IPsec IPv4 statistics."); #ifdef REGRESSION /* @@ -179,448 +215,293 @@ SYSCTL_VNET_STRUCT(_net_inet_ipsec, OID_AUTO, ipsecstats, * This allows to verify if the other side has proper replay attacks detection. */ VNET_DEFINE(int, ipsec_replay) = 0; -SYSCTL_VNET_INT(_net_inet_ipsec, OID_AUTO, test_replay, - CTLFLAG_RW, &VNET_NAME(ipsec_replay), 0, +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, test_replay, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_replay), 0, "Emulate replay attack"); /* * When set 1, IPsec will send packets with corrupted HMAC. * This allows to verify if the other side properly detects modified packets. */ VNET_DEFINE(int, ipsec_integrity) = 0; -SYSCTL_VNET_INT(_net_inet_ipsec, OID_AUTO, test_integrity, - CTLFLAG_RW, &VNET_NAME(ipsec_integrity), 0, +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, test_integrity, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_integrity), 0, "Emulate man-in-the-middle attack"); #endif #ifdef INET6 -VNET_DEFINE(struct ipsecstat, ipsec6stat); +VNET_PCPUSTAT_DEFINE(struct ipsecstat, ipsec6stat); +VNET_PCPUSTAT_SYSINIT(ipsec6stat); + +#ifdef VIMAGE +VNET_PCPUSTAT_SYSUNINIT(ipsec6stat); +#endif /* VIMAGE */ + VNET_DEFINE(int, ip6_esp_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_esp_net_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_ah_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_ah_net_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_ipsec_ecn) = 0; /* ECN ignore(-1)/forbidden(0)/allowed(1) */ +static VNET_DEFINE(int, ip6_filtertunnel) = 0; +#define V_ip6_filtertunnel VNET(ip6_filtertunnel) + SYSCTL_DECL(_net_inet6_ipsec6); /* net.inet6.ipsec6 */ -#ifdef COMPAT_KAME -SYSCTL_OID(_net_inet6_ipsec6, IPSECCTL_STATS, stats, CTLFLAG_RD, - 0, 0, compat_ipsecstats_sysctl, "S", "IPsec IPv6 statistics."); -#endif /* COMPAT_KAME */ -SYSCTL_VNET_INT(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY, def_policy, CTLFLAG_RW, - &VNET_NAME(ip4_def_policy).policy, 0, +SYSCTL_PROC(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY, def_policy, + CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW, 0, 0, sysctl_def_policy, "I", "IPsec default policy."); -SYSCTL_VNET_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_TRANSLEV, - esp_trans_deflev, CTLFLAG_RW, &VNET_NAME(ip6_esp_trans_deflev), 0, +SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_esp_trans_deflev), 0, "Default ESP transport mode level."); -SYSCTL_VNET_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_NETLEV, - esp_net_deflev, CTLFLAG_RW, &VNET_NAME(ip6_esp_net_deflev), 0, +SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_esp_net_deflev), 0, "Default ESP tunnel mode level."); -SYSCTL_VNET_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_TRANSLEV, - ah_trans_deflev, CTLFLAG_RW, &VNET_NAME(ip6_ah_trans_deflev), 0, +SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ah_trans_deflev), 0, "AH transfer mode default level."); -SYSCTL_VNET_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_NETLEV, - ah_net_deflev, CTLFLAG_RW, &VNET_NAME(ip6_ah_net_deflev), 0, +SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ah_net_deflev), 0, "AH tunnel mode default level."); -SYSCTL_VNET_INT(_net_inet6_ipsec6, IPSECCTL_ECN, - ecn, CTLFLAG_RW, &VNET_NAME(ip6_ipsec_ecn), 0, +SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ECN, ecn, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ipsec_ecn), 0, "Explicit Congestion Notification handling."); -SYSCTL_VNET_INT(_net_inet6_ipsec6, IPSECCTL_DEBUG, debug, CTLFLAG_RW, - &VNET_NAME(ipsec_debug), 0, - "Enable IPsec debugging output when set."); -SYSCTL_VNET_STRUCT(_net_inet6_ipsec6, IPSECCTL_STATS, - ipsecstats, CTLFLAG_RD, &VNET_NAME(ipsec6stat), ipsecstat, - "IPsec IPv6 statistics."); +SYSCTL_INT(_net_inet6_ipsec6, OID_AUTO, filtertunnel, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_filtertunnel), 0, + "If set, filter packets from an IPsec tunnel."); +SYSCTL_VNET_PCPUSTAT(_net_inet6_ipsec6, IPSECCTL_STATS, ipsecstats, + struct ipsecstat, ipsec6stat, "IPsec IPv6 statistics."); #endif /* INET6 */ -static int ipsec_setspidx_inpcb __P((struct mbuf *, struct inpcb *)); -static int ipsec_setspidx __P((struct mbuf *, struct secpolicyindex *, int)); -static void ipsec4_get_ulp __P((struct mbuf *m, struct secpolicyindex *, int)); -static int ipsec4_setspidx_ipaddr __P((struct mbuf *, struct secpolicyindex *)); +static int ipsec_in_reject(struct secpolicy *, struct inpcb *, + const struct mbuf *); + +#ifdef INET +static void ipsec4_get_ulp(const struct mbuf *, struct secpolicyindex *, int); +static void ipsec4_setspidx_ipaddr(const struct mbuf *, + struct secpolicyindex *); +#endif #ifdef INET6 -static void ipsec6_get_ulp __P((struct mbuf *m, struct secpolicyindex *, int)); -static int ipsec6_setspidx_ipaddr __P((struct mbuf *, struct secpolicyindex *)); +static void ipsec6_get_ulp(const struct mbuf *m, struct secpolicyindex *, int); +static void ipsec6_setspidx_ipaddr(const struct mbuf *, + struct secpolicyindex *); #endif -static void ipsec_delpcbpolicy __P((struct inpcbpolicy *)); -static struct secpolicy *ipsec_deepcopy_policy __P((struct secpolicy *src)); -static void vshiftl __P((unsigned char *, int, int)); - -MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy"); /* * Return a held reference to the default SP. */ static struct secpolicy * -key_allocsp_default(const char* where, int tag) +key_allocsp_default(void) { - struct secpolicy *sp; - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP key_allocsp_default from %s:%u\n", where, tag)); - - sp = &V_ip4_def_policy; - if (sp->policy != IPSEC_POLICY_DISCARD && - sp->policy != IPSEC_POLICY_NONE) { - ipseclog((LOG_INFO, "fixed system default policy: %d->%d\n", - sp->policy, IPSEC_POLICY_NONE)); - sp->policy = IPSEC_POLICY_NONE; - } - key_addref(sp); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP key_allocsp_default returns SP:%p (%u)\n", - sp, sp->refcnt)); - return (sp); + key_addref(V_def_policy); + return (V_def_policy); } -#define KEY_ALLOCSP_DEFAULT() \ - key_allocsp_default(__FILE__, __LINE__) -/* - * For OUTBOUND packet having a socket. Searching SPD for packet, - * and return a pointer to SP. - * OUT: NULL: no apropreate SP found, the following value is set to error. - * 0 : bypass - * EACCES : discard packet. - * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occured. - * others: a pointer to SP - * - * NOTE: IPv6 mapped adddress concern is implemented here. - */ -struct secpolicy * -ipsec_getpolicy(struct tdb_ident *tdbi, u_int dir) +static void +ipsec_invalidate_cache(struct inpcb *inp, u_int dir) { struct secpolicy *sp; - IPSEC_ASSERT(tdbi != NULL, ("null tdbi")); - IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, - ("invalid direction %u", dir)); - - sp = KEY_ALLOCSP2(tdbi->spi, &tdbi->dst, tdbi->proto, dir); - if (sp == NULL) /*XXX????*/ - sp = KEY_ALLOCSP_DEFAULT(); - IPSEC_ASSERT(sp != NULL, ("null SP")); - return (sp); + INP_WLOCK_ASSERT(inp); + if (dir == IPSEC_DIR_OUTBOUND) { + if (inp->inp_sp->flags & INP_INBOUND_POLICY) + return; + sp = inp->inp_sp->sp_in; + inp->inp_sp->sp_in = NULL; + } else { + if (inp->inp_sp->flags & INP_OUTBOUND_POLICY) + return; + sp = inp->inp_sp->sp_out; + inp->inp_sp->sp_out = NULL; + } + if (sp != NULL) + key_freesp(&sp); /* release extra reference */ } -/* - * For OUTBOUND packet having a socket. Searching SPD for packet, - * and return a pointer to SP. - * OUT: NULL: no apropreate SP found, the following value is set to error. - * 0 : bypass - * EACCES : discard packet. - * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occured. - * others: a pointer to SP - * - * NOTE: IPv6 mapped adddress concern is implemented here. - */ -static struct secpolicy * -ipsec_getpolicybysock(struct mbuf *m, u_int dir, struct inpcb *inp, int *error) +static void +ipsec_cachepolicy(struct inpcb *inp, struct secpolicy *sp, u_int dir) { - struct inpcbpolicy *pcbsp; - struct secpolicy *currsp = NULL; /* Policy on socket. */ - struct secpolicy *sp; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(inp != NULL, ("null inpcb")); - IPSEC_ASSERT(error != NULL, ("null error")); - IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, - ("invalid direction %u", dir)); + uint32_t genid; + int downgrade; - /* Set spidx in pcb. */ - *error = ipsec_setspidx_inpcb(m, inp); - if (*error) - return (NULL); + INP_LOCK_ASSERT(inp); - pcbsp = inp->inp_sp; - IPSEC_ASSERT(pcbsp != NULL, ("null pcbsp")); - switch (dir) { - case IPSEC_DIR_INBOUND: - currsp = pcbsp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - currsp = pcbsp->sp_out; - break; + if (dir == IPSEC_DIR_OUTBOUND) { + /* Do we have configured PCB policy? */ + if (inp->inp_sp->flags & INP_OUTBOUND_POLICY) + return; + /* Another thread has already set cached policy */ + if (inp->inp_sp->sp_out != NULL) + return; + /* + * Do not cache OUTBOUND policy if PCB isn't connected, + * i.e. foreign address is INADDR_ANY/UNSPECIFIED. + */ +#ifdef INET + if ((inp->inp_vflag & INP_IPV4) != 0 && + inp->inp_faddr.s_addr == INADDR_ANY) + return; +#endif +#ifdef INET6 + if ((inp->inp_vflag & INP_IPV6) != 0 && + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) + return; +#endif + } else { + /* Do we have configured PCB policy? */ + if (inp->inp_sp->flags & INP_INBOUND_POLICY) + return; + /* Another thread has already set cached policy */ + if (inp->inp_sp->sp_in != NULL) + return; + /* + * Do not cache INBOUND policy for listen socket, + * that is bound to INADDR_ANY/UNSPECIFIED address. + */ +#ifdef INET + if ((inp->inp_vflag & INP_IPV4) != 0 && + inp->inp_faddr.s_addr == INADDR_ANY) + return; +#endif +#ifdef INET6 + if ((inp->inp_vflag & INP_IPV6) != 0 && + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) + return; +#endif } - IPSEC_ASSERT(currsp != NULL, ("null currsp")); - - if (pcbsp->priv) { /* When privilieged socket. */ - switch (currsp->policy) { - case IPSEC_POLICY_BYPASS: - case IPSEC_POLICY_IPSEC: - key_addref(currsp); - sp = currsp; - break; - - case IPSEC_POLICY_ENTRUST: - /* Look for a policy in SPD. */ - sp = KEY_ALLOCSP(&currsp->spidx, dir); - if (sp == NULL) /* No SP found. */ - sp = KEY_ALLOCSP_DEFAULT(); - break; - - default: - ipseclog((LOG_ERR, "%s: Invalid policy for PCB %d\n", - __func__, currsp->policy)); - *error = EINVAL; - return (NULL); - } - } else { /* Unpriv, SPD has policy. */ - sp = KEY_ALLOCSP(&currsp->spidx, dir); - if (sp == NULL) { /* No SP found. */ - switch (currsp->policy) { - case IPSEC_POLICY_BYPASS: - ipseclog((LOG_ERR, "%s: Illegal policy for " - "non-priviliged defined %d\n", - __func__, currsp->policy)); - *error = EINVAL; - return (NULL); - - case IPSEC_POLICY_ENTRUST: - sp = KEY_ALLOCSP_DEFAULT(); - break; - - case IPSEC_POLICY_IPSEC: - key_addref(currsp); - sp = currsp; - break; - - default: - ipseclog((LOG_ERR, "%s: Invalid policy for " - "PCB %d\n", __func__, currsp->policy)); - *error = EINVAL; - return (NULL); - } - } + downgrade = 0; + if (!INP_WLOCKED(inp)) { + if ((downgrade = INP_TRY_UPGRADE(inp)) == 0) + return; } - IPSEC_ASSERT(sp != NULL, - ("null SP (priv %u policy %u", pcbsp->priv, currsp->policy)); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s (priv %u policy %u) allocate SP:%p (refcnt %u)\n", - __func__, pcbsp->priv, currsp->policy, sp, sp->refcnt)); - return (sp); -} - -/* - * For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet, - * and return a pointer to SP. - * OUT: positive: a pointer to the entry for security policy leaf matched. - * NULL: no apropreate SP found, the following value is set to error. - * 0 : bypass - * EACCES : discard packet. - * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occured. - */ -struct secpolicy * -ipsec_getpolicybyaddr(struct mbuf *m, u_int dir, int flag, int *error) -{ - struct secpolicyindex spidx; - struct secpolicy *sp; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(error != NULL, ("null error")); - IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, - ("invalid direction %u", dir)); - - sp = NULL; - if (key_havesp(dir)) { - /* Make an index to look for a policy. */ - *error = ipsec_setspidx(m, &spidx, - (flag & IP_FORWARDING) ? 0 : 1); - if (*error != 0) { - DPRINTF(("%s: setpidx failed, dir %u flag %u\n", - __func__, dir, flag)); - return (NULL); - } - spidx.dir = dir; - - sp = KEY_ALLOCSP(&spidx, dir); + if (dir == IPSEC_DIR_OUTBOUND) + inp->inp_sp->sp_out = sp; + else + inp->inp_sp->sp_in = sp; + /* + * SP is already referenced by the lookup code. + * We take extra reference here to avoid race in the + * ipsec_getpcbpolicy() function - SP will not be freed in the + * time between we take SP pointer from the cache and key_addref() + * call. + */ + key_addref(sp); + genid = key_getspgen(); + if (genid != inp->inp_sp->genid) { + ipsec_invalidate_cache(inp, dir); + inp->inp_sp->genid = genid; } - if (sp == NULL) /* No SP found, use system default. */ - sp = KEY_ALLOCSP_DEFAULT(); - IPSEC_ASSERT(sp != NULL, ("null SP")); - return (sp); + KEYDBG(IPSEC_STAMP, + printf("%s: PCB(%p): cached %s SP(%p)\n", + __func__, inp, dir == IPSEC_DIR_OUTBOUND ? "OUTBOUND": + "INBOUND", sp)); + if (downgrade != 0) + INP_DOWNGRADE(inp); } -struct secpolicy * -ipsec4_checkpolicy(struct mbuf *m, u_int dir, u_int flag, int *error, - struct inpcb *inp) +static struct secpolicy * +ipsec_checkpolicy(struct secpolicy *sp, struct inpcb *inp, int *error) { - struct secpolicy *sp; - *error = 0; - if (inp == NULL) - sp = ipsec_getpolicybyaddr(m, dir, flag, error); - else - sp = ipsec_getpolicybysock(m, dir, inp, error); - if (sp == NULL) { - IPSEC_ASSERT(*error != 0, ("getpolicy failed w/o error")); - IPSECSTAT_INC(ips_out_inval); - return (NULL); - } - IPSEC_ASSERT(*error == 0, ("sp w/ error set to %u", *error)); + /* Save found OUTBOUND policy into PCB SP cache. */ + if (inp != NULL && inp->inp_sp != NULL && inp->inp_sp->sp_out == NULL) + ipsec_cachepolicy(inp, sp, IPSEC_DIR_OUTBOUND); + switch (sp->policy) { - case IPSEC_POLICY_ENTRUST: default: printf("%s: invalid policy %u\n", __func__, sp->policy); /* FALLTHROUGH */ case IPSEC_POLICY_DISCARD: - IPSECSTAT_INC(ips_out_polvio); *error = -EINVAL; /* Packet is discarded by caller. */ - break; + /* FALLTHROUGH */ case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: - KEY_FREESP(&sp); + key_freesp(&sp); sp = NULL; /* NB: force NULL result. */ break; case IPSEC_POLICY_IPSEC: - if (sp->req == NULL) /* Acquire a SA. */ - *error = key_spdacquire(sp); + /* XXXAE: handle LARVAL SP */ break; } - if (*error != 0) { - KEY_FREESP(&sp); - sp = NULL; - } + KEYDBG(IPSEC_DUMP, + printf("%s: get SP(%p), error %d\n", __func__, sp, *error)); return (sp); } -static int -ipsec_setspidx_inpcb(struct mbuf *m, struct inpcb *inp) +static struct secpolicy * +ipsec_getpcbpolicy(struct inpcb *inp, u_int dir) { - int error; + struct secpolicy *sp; + int flags, downgrade; + + if (inp == NULL || inp->inp_sp == NULL) + return (NULL); - IPSEC_ASSERT(inp != NULL, ("null inp")); - IPSEC_ASSERT(inp->inp_sp != NULL, ("null inp_sp")); - IPSEC_ASSERT(inp->inp_sp->sp_out != NULL && inp->inp_sp->sp_in != NULL, - ("null sp_in || sp_out")); + INP_LOCK_ASSERT(inp); - error = ipsec_setspidx(m, &inp->inp_sp->sp_in->spidx, 1); - if (error == 0) { - inp->inp_sp->sp_in->spidx.dir = IPSEC_DIR_INBOUND; - inp->inp_sp->sp_out->spidx = inp->inp_sp->sp_in->spidx; - inp->inp_sp->sp_out->spidx.dir = IPSEC_DIR_OUTBOUND; + flags = inp->inp_sp->flags; + if (dir == IPSEC_DIR_OUTBOUND) { + sp = inp->inp_sp->sp_out; + flags &= INP_OUTBOUND_POLICY; } else { - bzero(&inp->inp_sp->sp_in->spidx, - sizeof (inp->inp_sp->sp_in->spidx)); - bzero(&inp->inp_sp->sp_out->spidx, - sizeof (inp->inp_sp->sp_in->spidx)); + sp = inp->inp_sp->sp_in; + flags &= INP_INBOUND_POLICY; } - return (error); -} - -/* - * Configure security policy index (src/dst/proto/sport/dport) - * by looking at the content of mbuf. - * The caller is responsible for error recovery (like clearing up spidx). - */ -static int -ipsec_setspidx(struct mbuf *m, struct secpolicyindex *spidx, int needport) -{ - struct ip *ip = NULL; - struct ip ipbuf; - u_int v; - struct mbuf *n; - int len; - int error; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - /* - * Validate m->m_pkthdr.len. We see incorrect length if we - * mistakenly call this function with inconsistent mbuf chain - * (like 4.4BSD tcp/udp processing). XXX Should we panic here? + * Check flags. If we have PCB SP, just return it. + * Otherwise we need to check that cached SP entry isn't stale. */ - len = 0; - for (n = m; n; n = n->m_next) - len += n->m_len; - if (m->m_pkthdr.len != len) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: pkthdr len(%d) mismatch (%d), ignored.\n", - __func__, len, m->m_pkthdr.len)); - return (EINVAL); - } - - if (m->m_pkthdr.len < sizeof(struct ip)) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: pkthdr len(%d) too small (v4), ignored.\n", - __func__, m->m_pkthdr.len)); - return (EINVAL); - } - - if (m->m_len >= sizeof(*ip)) - ip = mtod(m, struct ip *); - else { - m_copydata(m, 0, sizeof(ipbuf), (caddr_t)&ipbuf); - ip = &ipbuf; - } -#ifdef _IP_VHL - v = _IP_VHL_V(ip->ip_vhl); -#else - v = ip->ip_v; -#endif - switch (v) { - case 4: - error = ipsec4_setspidx_ipaddr(m, spidx); - if (error) - return (error); - ipsec4_get_ulp(m, spidx, needport); - return (0); -#ifdef INET6 - case 6: - if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: pkthdr len(%d) too small (v6), " - "ignored\n", __func__, m->m_pkthdr.len)); - return (EINVAL); + if (flags == 0) { + if (sp == NULL) + return (NULL); + if (inp->inp_sp->genid != key_getspgen()) { + /* Invalidate the cache. */ + downgrade = 0; + if (!INP_WLOCKED(inp)) { + if ((downgrade = INP_TRY_UPGRADE(inp)) == 0) + return (NULL); + } + ipsec_invalidate_cache(inp, IPSEC_DIR_OUTBOUND); + ipsec_invalidate_cache(inp, IPSEC_DIR_INBOUND); + if (downgrade != 0) + INP_DOWNGRADE(inp); + return (NULL); } - error = ipsec6_setspidx_ipaddr(m, spidx); - if (error) - return (error); - ipsec6_get_ulp(m, spidx, needport); - return (0); -#endif - default: - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: " "unknown IP version %u, ignored.\n", - __func__, v)); - return (EINVAL); + KEYDBG(IPSEC_STAMP, + printf("%s: PCB(%p): cache hit SP(%p)\n", + __func__, inp, sp)); + /* Return referenced cached policy */ } + key_addref(sp); + return (sp); } +#ifdef INET static void -ipsec4_get_ulp(struct mbuf *m, struct secpolicyindex *spidx, int needport) +ipsec4_get_ulp(const struct mbuf *m, struct secpolicyindex *spidx, + int needport) { - u_int8_t nxt; + uint8_t nxt; int off; /* Sanity check. */ - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip),("packet too short")); + IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip), + ("packet too short")); - /* NB: ip_input() flips it into host endian. XXX Need more checking. */ if (m->m_len >= sizeof (struct ip)) { - struct ip *ip = mtod(m, struct ip *); - if (ip->ip_off & (IP_MF | IP_OFFMASK)) + const struct ip *ip = mtod(m, const struct ip *); + if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) goto done; -#ifdef _IP_VHL - off = _IP_VHL_HL(ip->ip_vhl) << 2; -#else off = ip->ip_hl << 2; -#endif nxt = ip->ip_p; } else { struct ip ih; m_copydata(m, 0, sizeof (struct ip), (caddr_t) &ih); - if (ih.ip_off & (IP_MF | IP_OFFMASK)) + if (ih.ip_off & htons(IP_MF | IP_OFFMASK)) goto done; -#ifdef _IP_VHL - off = _IP_VHL_HL(ih.ip_vhl) << 2; -#else off = ih.ip_hl << 2; -#endif nxt = ih.ip_p; } @@ -670,60 +551,134 @@ done: done_proto: spidx->src.sin.sin_port = IPSEC_PORT_ANY; spidx->dst.sin.sin_port = IPSEC_PORT_ANY; + KEYDBG(IPSEC_DUMP, + printf("%s: ", __func__); kdebug_secpolicyindex(spidx, NULL)); } -/* Assumes that m is sane. */ -static int -ipsec4_setspidx_ipaddr(struct mbuf *m, struct secpolicyindex *spidx) +static void +ipsec4_setspidx_ipaddr(const struct mbuf *m, struct secpolicyindex *spidx) { - static const struct sockaddr_in template = { - sizeof (struct sockaddr_in), - AF_INET, - 0, { 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 } - }; - spidx->src.sin = template; - spidx->dst.sin = template; + ipsec4_setsockaddrs(m, &spidx->src, &spidx->dst); + spidx->prefs = sizeof(struct in_addr) << 3; + spidx->prefd = sizeof(struct in_addr) << 3; +} - if (m->m_len < sizeof (struct ip)) { - m_copydata(m, offsetof(struct ip, ip_src), - sizeof (struct in_addr), - (caddr_t) &spidx->src.sin.sin_addr); - m_copydata(m, offsetof(struct ip, ip_dst), - sizeof (struct in_addr), - (caddr_t) &spidx->dst.sin.sin_addr); - } else { - struct ip *ip = mtod(m, struct ip *); - spidx->src.sin.sin_addr = ip->ip_src; - spidx->dst.sin.sin_addr = ip->ip_dst; +static struct secpolicy * +ipsec4_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir) +{ + struct secpolicyindex spidx; + struct secpolicy *sp; + + sp = ipsec_getpcbpolicy(inp, dir); + if (sp == NULL && key_havesp(dir)) { + /* Make an index to look for a policy. */ + ipsec4_setspidx_ipaddr(m, &spidx); + /* Fill ports in spidx if we have inpcb. */ + ipsec4_get_ulp(m, &spidx, inp != NULL); + spidx.dir = dir; + sp = key_allocsp(&spidx, dir); } + if (sp == NULL) /* No SP found, use system default. */ + sp = key_allocsp_default(); + return (sp); +} - spidx->prefs = sizeof(struct in_addr) << 3; - spidx->prefd = sizeof(struct in_addr) << 3; +/* + * Check security policy for *OUTBOUND* IPv4 packet. + */ +struct secpolicy * +ipsec4_checkpolicy(const struct mbuf *m, struct inpcb *inp, int *error) +{ + struct secpolicy *sp; - return (0); + *error = 0; + sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_OUTBOUND); + if (sp != NULL) + sp = ipsec_checkpolicy(sp, inp, error); + if (sp == NULL) { + switch (*error) { + case 0: /* No IPsec required: BYPASS or NONE */ + break; + case -EINVAL: + IPSECSTAT_INC(ips_out_polvio); + break; + default: + IPSECSTAT_INC(ips_out_inval); + } + } + KEYDBG(IPSEC_STAMP, + printf("%s: using SP(%p), error %d\n", __func__, sp, *error)); + if (sp != NULL) + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); + return (sp); } +/* + * Check IPv4 packet against *INBOUND* security policy. + * This function is called from tcp_input(), udp_input(), + * rip_input() and sctp_input(). + */ +int +ipsec4_in_reject(const struct mbuf *m, struct inpcb *inp) +{ + struct secpolicy *sp; + int result; + + sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_INBOUND); + result = ipsec_in_reject(sp, inp, m); + key_freesp(&sp); + if (result != 0) + IPSECSTAT_INC(ips_in_polvio); + return (result); +} + +/* + * IPSEC_CAP() method implementation for IPv4. + */ +int +ipsec4_capability(struct mbuf *m, u_int cap) +{ + + switch (cap) { + case IPSEC_CAP_BYPASS_FILTER: + /* + * Bypass packet filtering for packets previously handled + * by IPsec. + */ + if (!V_ip4_filtertunnel && + m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) + return (1); + return (0); + case IPSEC_CAP_OPERABLE: + /* Do we have active security policies? */ + if (key_havesp(IPSEC_DIR_INBOUND) != 0 || + key_havesp(IPSEC_DIR_OUTBOUND) != 0) + return (1); + return (0); + }; + return (EOPNOTSUPP); +} + +#endif /* INET */ + #ifdef INET6 static void -ipsec6_get_ulp(struct mbuf *m, struct secpolicyindex *spidx, int needport) +ipsec6_get_ulp(const struct mbuf *m, struct secpolicyindex *spidx, + int needport) { - int off, nxt; struct tcphdr th; struct udphdr uh; struct icmp6_hdr ih; + int off, nxt; - /* Sanity check. */ - if (m == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s:\n", __func__); kdebug_mbuf(m)); + IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip6_hdr), + ("packet too short")); /* Set default. */ spidx->ul_proto = IPSEC_ULPROTO_ANY; - ((struct sockaddr_in6 *)&spidx->src)->sin6_port = IPSEC_PORT_ANY; - ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = IPSEC_PORT_ANY; + spidx->src.sin6.sin6_port = IPSEC_PORT_ANY; + spidx->dst.sin6.sin6_port = IPSEC_PORT_ANY; nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); @@ -738,8 +693,8 @@ ipsec6_get_ulp(struct mbuf *m, struct secpolicyindex *spidx, int needport) if (off + sizeof(struct tcphdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(th), (caddr_t)&th); - ((struct sockaddr_in6 *)&spidx->src)->sin6_port = th.th_sport; - ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = th.th_dport; + spidx->src.sin6.sin6_port = th.th_sport; + spidx->dst.sin6.sin6_port = th.th_dport; break; case IPPROTO_UDP: spidx->ul_proto = nxt; @@ -748,355 +703,157 @@ ipsec6_get_ulp(struct mbuf *m, struct secpolicyindex *spidx, int needport) if (off + sizeof(struct udphdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(uh), (caddr_t)&uh); - ((struct sockaddr_in6 *)&spidx->src)->sin6_port = uh.uh_sport; - ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = uh.uh_dport; + spidx->src.sin6.sin6_port = uh.uh_sport; + spidx->dst.sin6.sin6_port = uh.uh_dport; break; case IPPROTO_ICMPV6: spidx->ul_proto = nxt; if (off + sizeof(struct icmp6_hdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(ih), (caddr_t)&ih); - ((struct sockaddr_in6 *)&spidx->src)->sin6_port = - htons((uint16_t)ih.icmp6_type); - ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = - htons((uint16_t)ih.icmp6_code); + spidx->src.sin6.sin6_port = htons((uint16_t)ih.icmp6_type); + spidx->dst.sin6.sin6_port = htons((uint16_t)ih.icmp6_code); break; default: /* XXX Intermediate headers??? */ spidx->ul_proto = nxt; break; } + KEYDBG(IPSEC_DUMP, + printf("%s: ", __func__); kdebug_secpolicyindex(spidx, NULL)); } -/* Assumes that m is sane. */ -static int -ipsec6_setspidx_ipaddr(struct mbuf *m, struct secpolicyindex *spidx) +static void +ipsec6_setspidx_ipaddr(const struct mbuf *m, struct secpolicyindex *spidx) { - struct ip6_hdr *ip6 = NULL; - struct ip6_hdr ip6buf; - struct sockaddr_in6 *sin6; - - if (m->m_len >= sizeof(*ip6)) - ip6 = mtod(m, struct ip6_hdr *); - else { - m_copydata(m, 0, sizeof(ip6buf), (caddr_t)&ip6buf); - ip6 = &ip6buf; - } - sin6 = (struct sockaddr_in6 *)&spidx->src; - bzero(sin6, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(struct sockaddr_in6); - bcopy(&ip6->ip6_src, &sin6->sin6_addr, sizeof(ip6->ip6_src)); - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { - sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]); - } + ipsec6_setsockaddrs(m, &spidx->src, &spidx->dst); spidx->prefs = sizeof(struct in6_addr) << 3; - - sin6 = (struct sockaddr_in6 *)&spidx->dst; - bzero(sin6, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(struct sockaddr_in6); - bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(ip6->ip6_dst)); - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { - sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); - } spidx->prefd = sizeof(struct in6_addr) << 3; - - return (0); } -#endif -static void -ipsec_delpcbpolicy(struct inpcbpolicy *p) -{ - - free(p, M_IPSEC_INPCB); -} - -/* Initialize policy in PCB. */ -int -ipsec_init_policy(struct socket *so, struct inpcbpolicy **pcb_sp) +static struct secpolicy * +ipsec6_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir) { - struct inpcbpolicy *new; - - /* Sanity check. */ - if (so == NULL || pcb_sp == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - - new = (struct inpcbpolicy *) malloc(sizeof(struct inpcbpolicy), - M_IPSEC_INPCB, M_NOWAIT|M_ZERO); - if (new == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - return (ENOBUFS); - } - - new->priv = IPSEC_IS_PRIVILEGED_SO(so); - - if ((new->sp_in = KEY_NEWSP()) == NULL) { - ipsec_delpcbpolicy(new); - return (ENOBUFS); - } - new->sp_in->state = IPSEC_SPSTATE_ALIVE; - new->sp_in->policy = IPSEC_POLICY_ENTRUST; + struct secpolicyindex spidx; + struct secpolicy *sp; - if ((new->sp_out = KEY_NEWSP()) == NULL) { - KEY_FREESP(&new->sp_in); - ipsec_delpcbpolicy(new); - return (ENOBUFS); + sp = ipsec_getpcbpolicy(inp, dir); + if (sp == NULL && key_havesp(dir)) { + /* Make an index to look for a policy. */ + ipsec6_setspidx_ipaddr(m, &spidx); + /* Fill ports in spidx if we have inpcb. */ + ipsec6_get_ulp(m, &spidx, inp != NULL); + spidx.dir = dir; + sp = key_allocsp(&spidx, dir); } - new->sp_out->state = IPSEC_SPSTATE_ALIVE; - new->sp_out->policy = IPSEC_POLICY_ENTRUST; - - *pcb_sp = new; - - return (0); + if (sp == NULL) /* No SP found, use system default. */ + sp = key_allocsp_default(); + return (sp); } -/* Copy old IPsec policy into new. */ -int -ipsec_copy_policy(struct inpcbpolicy *old, struct inpcbpolicy *new) +/* + * Check security policy for *OUTBOUND* IPv6 packet. + */ +struct secpolicy * +ipsec6_checkpolicy(const struct mbuf *m, struct inpcb *inp, int *error) { struct secpolicy *sp; - sp = ipsec_deepcopy_policy(old->sp_in); - if (sp) { - KEY_FREESP(&new->sp_in); - new->sp_in = sp; - } else - return (ENOBUFS); - - sp = ipsec_deepcopy_policy(old->sp_out); - if (sp) { - KEY_FREESP(&new->sp_out); - new->sp_out = sp; - } else - return (ENOBUFS); - - new->priv = old->priv; - - return (0); -} - -struct ipsecrequest * -ipsec_newisr(void) -{ - struct ipsecrequest *p; - - p = malloc(sizeof(struct ipsecrequest), M_IPSEC_SR, M_NOWAIT|M_ZERO); - if (p != NULL) - IPSECREQUEST_LOCK_INIT(p); - return (p); -} - -void -ipsec_delisr(struct ipsecrequest *p) -{ - - IPSECREQUEST_LOCK_DESTROY(p); - free(p, M_IPSEC_SR); -} - -/* Deep-copy a policy in PCB. */ -static struct secpolicy * -ipsec_deepcopy_policy(struct secpolicy *src) -{ - struct ipsecrequest *newchain = NULL; - struct ipsecrequest *p; - struct ipsecrequest **q; - struct ipsecrequest *r; - struct secpolicy *dst; - - if (src == NULL) - return (NULL); - dst = KEY_NEWSP(); - if (dst == NULL) - return (NULL); - - /* - * Deep-copy IPsec request chain. This is required since struct - * ipsecrequest is not reference counted. - */ - q = &newchain; - for (p = src->req; p; p = p->next) { - *q = ipsec_newisr(); - if (*q == NULL) - goto fail; - (*q)->saidx.proto = p->saidx.proto; - (*q)->saidx.mode = p->saidx.mode; - (*q)->level = p->level; - (*q)->saidx.reqid = p->saidx.reqid; - - bcopy(&p->saidx.src, &(*q)->saidx.src, sizeof((*q)->saidx.src)); - bcopy(&p->saidx.dst, &(*q)->saidx.dst, sizeof((*q)->saidx.dst)); - - (*q)->sp = dst; - - q = &((*q)->next); - } - - dst->req = newchain; - dst->state = src->state; - dst->policy = src->policy; - /* Do not touch the refcnt fields. */ - - return (dst); - -fail: - for (p = newchain; p; p = r) { - r = p->next; - ipsec_delisr(p); - p = NULL; + *error = 0; + sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_OUTBOUND); + if (sp != NULL) + sp = ipsec_checkpolicy(sp, inp, error); + if (sp == NULL) { + switch (*error) { + case 0: /* No IPsec required: BYPASS or NONE */ + break; + case -EINVAL: + IPSEC6STAT_INC(ips_out_polvio); + break; + default: + IPSEC6STAT_INC(ips_out_inval); + } } - return (NULL); + KEYDBG(IPSEC_STAMP, + printf("%s: using SP(%p), error %d\n", __func__, sp, *error)); + if (sp != NULL) + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); + return (sp); } -/* Set policy and IPsec request if present. */ -static int -ipsec_set_policy_internal(struct secpolicy **pcb_sp, int optname, - caddr_t request, size_t len, struct ucred *cred) +/* + * Check IPv6 packet against inbound security policy. + * This function is called from tcp6_input(), udp6_input(), + * rip6_input() and sctp_input(). + */ +int +ipsec6_in_reject(const struct mbuf *m, struct inpcb *inp) { - struct sadb_x_policy *xpl; - struct secpolicy *newsp = NULL; - int error; - - /* Sanity check. */ - if (pcb_sp == NULL || *pcb_sp == NULL || request == NULL) - return (EINVAL); - if (len < sizeof(*xpl)) - return (EINVAL); - xpl = (struct sadb_x_policy *)request; - - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: passed policy\n", __func__); - kdebug_sadb_x_policy((struct sadb_ext *)xpl)); - - /* Check policy type. */ - /* ipsec_set_policy_internal() accepts IPSEC, ENTRUST and BYPASS. */ - if (xpl->sadb_x_policy_type == IPSEC_POLICY_DISCARD - || xpl->sadb_x_policy_type == IPSEC_POLICY_NONE) - return (EINVAL); - - /* Check privileged socket. */ - if (cred != NULL && xpl->sadb_x_policy_type == IPSEC_POLICY_BYPASS) { - error = priv_check_cred(cred, PRIV_NETINET_IPSEC, 0); - if (error) - return (EACCES); - } - - /* Allocating new SP entry. */ - if ((newsp = key_msg2sp(xpl, len, &error)) == NULL) - return (error); - - newsp->state = IPSEC_SPSTATE_ALIVE; - - /* Clear old SP and set new SP. */ - KEY_FREESP(pcb_sp); - *pcb_sp = newsp; - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: new policy\n", __func__); - kdebug_secpolicy(newsp)); + struct secpolicy *sp; + int result; - return (0); + sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_INBOUND); + result = ipsec_in_reject(sp, inp, m); + key_freesp(&sp); + if (result) + IPSEC6STAT_INC(ips_in_polvio); + return (result); } +/* + * IPSEC_CAP() method implementation for IPv6. + */ int -ipsec_set_policy(struct inpcb *inp, int optname, caddr_t request, - size_t len, struct ucred *cred) +ipsec6_capability(struct mbuf *m, u_int cap) { - struct sadb_x_policy *xpl; - struct secpolicy **pcb_sp; - - /* Sanity check. */ - if (inp == NULL || request == NULL) - return (EINVAL); - if (len < sizeof(*xpl)) - return (EINVAL); - xpl = (struct sadb_x_policy *)request; - - /* Select direction. */ - switch (xpl->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - pcb_sp = &inp->inp_sp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - pcb_sp = &inp->inp_sp->sp_out; - break; - default: - ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, - xpl->sadb_x_policy_dir)); - return (EINVAL); - } - return (ipsec_set_policy_internal(pcb_sp, optname, request, len, cred)); + switch (cap) { + case IPSEC_CAP_BYPASS_FILTER: + /* + * Bypass packet filtering for packets previously handled + * by IPsec. + */ + if (!V_ip6_filtertunnel && + m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) + return (1); + return (0); + case IPSEC_CAP_OPERABLE: + /* Do we have active security policies? */ + if (key_havesp(IPSEC_DIR_INBOUND) != 0 || + key_havesp(IPSEC_DIR_OUTBOUND) != 0) + return (1); + return (0); + }; + return (EOPNOTSUPP); } +#endif /* INET6 */ int -ipsec_get_policy(struct inpcb *inp, caddr_t request, size_t len, - struct mbuf **mp) +ipsec_run_hhooks(struct ipsec_ctx_data *ctx, int type) { - struct sadb_x_policy *xpl; - struct secpolicy *pcb_sp; + int idx; - /* Sanity check. */ - if (inp == NULL || request == NULL || mp == NULL) - return (EINVAL); - IPSEC_ASSERT(inp->inp_sp != NULL, ("null inp_sp")); - if (len < sizeof(*xpl)) - return (EINVAL); - xpl = (struct sadb_x_policy *)request; - - /* Select direction. */ - switch (xpl->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - pcb_sp = inp->inp_sp->sp_in; + switch (ctx->af) { +#ifdef INET + case AF_INET: + idx = HHOOK_IPSEC_INET; break; - case IPSEC_DIR_OUTBOUND: - pcb_sp = inp->inp_sp->sp_out; +#endif +#ifdef INET6 + case AF_INET6: + idx = HHOOK_IPSEC_INET6; break; +#endif default: - ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, - xpl->sadb_x_policy_dir)); - return (EINVAL); - } - - /* Sanity check. Should be an IPSEC_ASSERT. */ - if (pcb_sp == NULL) - return (EINVAL); - - *mp = key_sp2msg(pcb_sp); - if (!*mp) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - return (ENOBUFS); + return (EPFNOSUPPORT); } - - (*mp)->m_type = MT_DATA; - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s:\n", __func__); kdebug_mbuf(*mp)); - - return (0); -} - -/* Delete policy in PCB. */ -int -ipsec_delete_pcbpolicy(struct inpcb *inp) -{ - IPSEC_ASSERT(inp != NULL, ("null inp")); - - if (inp->inp_sp == NULL) - return (0); - - if (inp->inp_sp->sp_in != NULL) - KEY_FREESP(&inp->inp_sp->sp_in); - - if (inp->inp_sp->sp_out != NULL) - KEY_FREESP(&inp->inp_sp->sp_out); - - ipsec_delpcbpolicy(inp->inp_sp); - inp->inp_sp = NULL; - + if (type == HHOOK_TYPE_IPSEC_IN) + HHOOKS_RUN_IF(V_ipsec_hhh_in[idx], ctx, NULL); + else + HHOOKS_RUN_IF(V_ipsec_hhh_out[idx], ctx, NULL); + if (*ctx->mp == NULL) + return (EACCES); return (0); } @@ -1105,32 +862,36 @@ ipsec_delete_pcbpolicy(struct inpcb *inp) * Either IPSEC_LEVEL_USE or IPSEC_LEVEL_REQUIRE are always returned. */ u_int -ipsec_get_reqlevel(struct ipsecrequest *isr) +ipsec_get_reqlevel(struct secpolicy *sp, u_int idx) { - u_int level = 0; + struct ipsecrequest *isr; u_int esp_trans_deflev, esp_net_deflev; u_int ah_trans_deflev, ah_net_deflev; + u_int level = 0; - IPSEC_ASSERT(isr != NULL && isr->sp != NULL, ("null argument")); - IPSEC_ASSERT(isr->sp->spidx.src.sa.sa_family == isr->sp->spidx.dst.sa.sa_family, - ("af family mismatch, src %u, dst %u", - isr->sp->spidx.src.sa.sa_family, - isr->sp->spidx.dst.sa.sa_family)); - + IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); /* XXX Note that we have ipseclog() expanded here - code sync issue. */ #define IPSEC_CHECK_DEFAULT(lev) \ - (((lev) != IPSEC_LEVEL_USE && (lev) != IPSEC_LEVEL_REQUIRE \ - && (lev) != IPSEC_LEVEL_UNIQUE) \ - ? (V_ipsec_debug \ - ? log(LOG_INFO, "fixed system default level " #lev ":%d->%d\n",\ - (lev), IPSEC_LEVEL_REQUIRE) \ - : 0), \ - (lev) = IPSEC_LEVEL_REQUIRE, \ - (lev) \ - : (lev)) + (((lev) != IPSEC_LEVEL_USE && (lev) != IPSEC_LEVEL_REQUIRE && \ + (lev) != IPSEC_LEVEL_UNIQUE) \ + ? (V_ipsec_debug ? \ + log(LOG_INFO, "fixed system default level " #lev ":%d->%d\n",\ + (lev), IPSEC_LEVEL_REQUIRE) : 0), \ + (lev) = IPSEC_LEVEL_REQUIRE, (lev) : (lev)) + + /* + * IPsec VTI uses unique security policy with fake spidx filled + * with zeroes. Just return IPSEC_LEVEL_REQUIRE instead of doing + * full level lookup for such policies. + */ + if (sp->state == IPSEC_SPSTATE_IFNET) { + IPSEC_ASSERT(sp->req[idx]->level == IPSEC_LEVEL_UNIQUE, + ("Wrong IPsec request level %d", sp->req[idx]->level)); + return (IPSEC_LEVEL_REQUIRE); + } /* Set default level. */ - switch (((struct sockaddr *)&isr->sp->spidx.src)->sa_family) { + switch (sp->spidx.src.sa.sa_family) { #ifdef INET case AF_INET: esp_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip4_esp_trans_deflev); @@ -1149,11 +910,12 @@ ipsec_get_reqlevel(struct ipsecrequest *isr) #endif /* INET6 */ default: panic("%s: unknown af %u", - __func__, isr->sp->spidx.src.sa.sa_family); + __func__, sp->spidx.src.sa.sa_family); } #undef IPSEC_CHECK_DEFAULT + isr = sp->req[idx]; /* Set level. */ switch (isr->level) { case IPSEC_LEVEL_DEFAULT: @@ -1198,6 +960,45 @@ ipsec_get_reqlevel(struct ipsecrequest *isr) return (level); } +static int +ipsec_check_history(const struct mbuf *m, struct secpolicy *sp, u_int idx) +{ + struct xform_history *xh; + struct m_tag *mtag; + + mtag = NULL; + while ((mtag = m_tag_find(__DECONST(struct mbuf *, m), + PACKET_TAG_IPSEC_IN_DONE, mtag)) != NULL) { + xh = (struct xform_history *)(mtag + 1); + KEYDBG(IPSEC_DATA, + char buf[IPSEC_ADDRSTRLEN]; + printf("%s: mode %s proto %u dst %s\n", __func__, + kdebug_secasindex_mode(xh->mode), xh->proto, + ipsec_address(&xh->dst, buf, sizeof(buf)))); + if (xh->proto != sp->req[idx]->saidx.proto) + continue; + /* If SA had IPSEC_MODE_ANY, consider this as match. */ + if (xh->mode != sp->req[idx]->saidx.mode && + xh->mode != IPSEC_MODE_ANY) + continue; + /* + * For transport mode IPsec request doesn't contain + * addresses. We need to use address from spidx. + */ + if (sp->req[idx]->saidx.mode == IPSEC_MODE_TRANSPORT) { + if (key_sockaddrcmp_withmask(&xh->dst.sa, + &sp->spidx.dst.sa, sp->spidx.prefd) != 0) + continue; + } else { + if (key_sockaddrcmp(&xh->dst.sa, + &sp->req[idx]->saidx.dst.sa, 0) != 0) + continue; + } + return (0); /* matched */ + } + return (1); +} + /* * Check security policy requirements against the actual * packet contents. Return one if the packet should be @@ -1208,14 +1009,17 @@ ipsec_get_reqlevel(struct ipsecrequest *isr) * 0: valid * 1: invalid */ -int -ipsec_in_reject(struct secpolicy *sp, struct mbuf *m) +static int +ipsec_in_reject(struct secpolicy *sp, struct inpcb *inp, const struct mbuf *m) { - struct ipsecrequest *isr; - int need_auth; + int i; + + KEYDBG(IPSEC_STAMP, + printf("%s: PCB(%p): using SP(%p)\n", __func__, inp, sp)); + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("%s: using SP\n", __func__); kdebug_secpolicy(sp)); + if (inp != NULL && inp->inp_sp != NULL && inp->inp_sp->sp_in == NULL) + ipsec_cachepolicy(inp, sp, IPSEC_DIR_INBOUND); /* Check policy. */ switch (sp->policy) { @@ -1229,131 +1033,59 @@ ipsec_in_reject(struct secpolicy *sp, struct mbuf *m) IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC, ("invalid policy %u", sp->policy)); - /* XXX Should compare policy against IPsec header history. */ - - need_auth = 0; - for (isr = sp->req; isr != NULL; isr = isr->next) { - if (ipsec_get_reqlevel(isr) != IPSEC_LEVEL_REQUIRE) + /* + * ipsec[46]_common_input_cb after each transform adds + * PACKET_TAG_IPSEC_IN_DONE mbuf tag. It contains SPI, proto, mode + * and destination address from saidx. We can compare info from + * these tags with requirements in SP. + */ + for (i = 0; i < sp->tcount; i++) { + /* + * Do not check IPcomp, since IPcomp document + * says that we shouldn't compress small packets. + * IPComp policy should always be treated as being + * in "use" level. + */ + if (sp->req[i]->saidx.proto == IPPROTO_IPCOMP || + ipsec_get_reqlevel(sp, i) != IPSEC_LEVEL_REQUIRE) continue; - switch (isr->saidx.proto) { + if (V_check_policy_history != 0 && + ipsec_check_history(m, sp, i) != 0) + return (1); + else switch (sp->req[i]->saidx.proto) { case IPPROTO_ESP: if ((m->m_flags & M_DECRYPTED) == 0) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, + KEYDBG(IPSEC_DUMP, printf("%s: ESP m_flags:%x\n", __func__, m->m_flags)); return (1); } - - if (!need_auth && - isr->sav != NULL && - isr->sav->tdb_authalgxform != NULL && - (m->m_flags & M_AUTHIPDGM) == 0) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: ESP/AH m_flags:%x\n", __func__, - m->m_flags)); - return (1); - } break; case IPPROTO_AH: - need_auth = 1; if ((m->m_flags & M_AUTHIPHDR) == 0) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, + KEYDBG(IPSEC_DUMP, printf("%s: AH m_flags:%x\n", __func__, m->m_flags)); return (1); } break; - case IPPROTO_IPCOMP: - /* - * We don't really care, as IPcomp document - * says that we shouldn't compress small - * packets. IPComp policy should always be - * treated as being in "use" level. - */ - break; } } return (0); /* Valid. */ } -static int -ipsec46_in_reject(struct mbuf *m, struct inpcb *inp) -{ - struct secpolicy *sp; - int error; - int result; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - - /* - * Get SP for this packet. - * When we are called from ip_forward(), we call - * ipsec_getpolicybyaddr() with IP_FORWARDING flag. - */ - if (inp == NULL) - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error); - else - sp = ipsec_getpolicybysock(m, IPSEC_DIR_INBOUND, inp, &error); - - if (sp != NULL) { - result = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - } else { - result = 0; /* XXX Should be panic? - * -> No, there may be error. */ - } - return (result); -} - -/* - * Check AH/ESP integrity. - * This function is called from tcp_input(), udp_input(), - * and {ah,esp}4_input for tunnel mode. - */ -int -ipsec4_in_reject(struct mbuf *m, struct inpcb *inp) -{ - int result; - - result = ipsec46_in_reject(m, inp); - if (result) - IPSECSTAT_INC(ips_in_polvio); - - return (result); -} - -#ifdef INET6 -/* - * Check AH/ESP integrity. - * This function is called from tcp6_input(), udp6_input(), - * and {ah,esp}6_input for tunnel mode. - */ -int -ipsec6_in_reject(struct mbuf *m, struct inpcb *inp) -{ - int result; - - result = ipsec46_in_reject(m, inp); - if (result) - IPSEC6STAT_INC(ips_in_polvio); - - return (result); -} -#endif - /* * Compute the byte size to be occupied by IPsec header. * In case it is tunnelled, it includes the size of outer IP header. - * NOTE: SP passed is freed in this function. */ static size_t ipsec_hdrsiz_internal(struct secpolicy *sp) { - struct ipsecrequest *isr; size_t size; + int i; - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("%s: using SP\n", __func__); kdebug_secpolicy(sp)); + KEYDBG(IPSEC_STAMP, printf("%s: using SP(%p)\n", __func__, sp)); + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); switch (sp->policy) { case IPSEC_POLICY_DISCARD: @@ -1365,80 +1097,69 @@ ipsec_hdrsiz_internal(struct secpolicy *sp) IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC, ("invalid policy %u", sp->policy)); + /* + * XXX: for each transform we need to lookup suitable SA + * and use info from SA to calculate headers size. + * XXX: for NAT-T we need to cosider UDP header size. + */ size = 0; - for (isr = sp->req; isr != NULL; isr = isr->next) { - size_t clen = 0; - - switch (isr->saidx.proto) { + for (i = 0; i < sp->tcount; i++) { + switch (sp->req[i]->saidx.proto) { case IPPROTO_ESP: - clen = esp_hdrsiz(isr->sav); + size += esp_hdrsiz(NULL); break; case IPPROTO_AH: - clen = ah_hdrsiz(isr->sav); + size += ah_hdrsiz(NULL); break; case IPPROTO_IPCOMP: - clen = sizeof(struct ipcomp); + size += sizeof(struct ipcomp); break; } - if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { - switch (isr->saidx.dst.sa.sa_family) { + if (sp->req[i]->saidx.mode == IPSEC_MODE_TUNNEL) { + switch (sp->req[i]->saidx.dst.sa.sa_family) { +#ifdef INET case AF_INET: - clen += sizeof(struct ip); + size += sizeof(struct ip); break; +#endif #ifdef INET6 case AF_INET6: - clen += sizeof(struct ip6_hdr); + size += sizeof(struct ip6_hdr); break; #endif default: ipseclog((LOG_ERR, "%s: unknown AF %d in " "IPsec tunnel SA\n", __func__, - ((struct sockaddr *)&isr->saidx.dst)->sa_family)); + sp->req[i]->saidx.dst.sa.sa_family)); break; } } - size += clen; } - return (size); } -/* - * This function is called from ipsec_hdrsiz_tcp(), ip_ipsec_mtu(), - * disabled ip6_ipsec_mtu() and ip6_forward(). +/* + * Compute ESP/AH header size for protocols with PCB, including + * outer IP header. Currently only tcp_output() uses it. */ size_t -ipsec_hdrsiz(struct mbuf *m, u_int dir, struct inpcb *inp) +ipsec_hdrsiz_inpcb(struct inpcb *inp) { + struct secpolicyindex spidx; struct secpolicy *sp; - int error; - size_t size; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - - /* Get SP for this packet. - * When we are called from ip_forward(), we call - * ipsec_getpolicybyaddr() with IP_FORWARDING flag. - */ - if (inp == NULL) - sp = ipsec_getpolicybyaddr(m, dir, IP_FORWARDING, &error); - else - sp = ipsec_getpolicybysock(m, dir, inp, &error); + size_t sz; - if (sp != NULL) { - size = ipsec_hdrsiz_internal(sp); - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("%s: size:%lu.\n", __func__, - (unsigned long)size)); - - KEY_FREESP(&sp); - } else { - size = 0; /* XXX Should be panic? - * -> No, we are called w/o knowing if - * IPsec processing is needed. */ + sp = ipsec_getpcbpolicy(inp, IPSEC_DIR_OUTBOUND); + if (sp == NULL && key_havesp(IPSEC_DIR_OUTBOUND)) { + ipsec_setspidx_inpcb(inp, &spidx, IPSEC_DIR_OUTBOUND); + sp = key_allocsp(&spidx, IPSEC_DIR_OUTBOUND); } - return (size); + if (sp == NULL) + sp = key_allocsp_default(); + sz = ipsec_hdrsiz_internal(sp); + key_freesp(&sp); + return (sz); } /* @@ -1449,27 +1170,31 @@ ipsec_hdrsiz(struct mbuf *m, u_int dir, struct inpcb *inp) * beforehand). * 0 (zero) is returned if packet disallowed, 1 if packet permitted. * - * Based on RFC 2401. + * Based on RFC 6479. Blocks are 32 bits unsigned integers */ + +#define IPSEC_BITMAP_INDEX_MASK(w) (w - 1) +#define IPSEC_REDUNDANT_BIT_SHIFTS 5 +#define IPSEC_REDUNDANT_BITS (1 << IPSEC_REDUNDANT_BIT_SHIFTS) +#define IPSEC_BITMAP_LOC_MASK (IPSEC_REDUNDANT_BITS - 1) + int -ipsec_chkreplay(u_int32_t seq, struct secasvar *sav) +ipsec_chkreplay(uint32_t seq, struct secasvar *sav) { const struct secreplay *replay; - u_int32_t diff; - int fr; - u_int32_t wsizeb; /* Constant: bits of window size. */ - int frlast; /* Constant: last frame. */ + uint32_t wsizeb; /* Constant: window size. */ + int index, bit_location; IPSEC_ASSERT(sav != NULL, ("Null SA")); IPSEC_ASSERT(sav->replay != NULL, ("Null replay state")); replay = sav->replay; + /* No need to check replay if disabled. */ if (replay->wsize == 0) - return (1); /* No need to check replay. */ + return (1); /* Constant. */ - frlast = replay->wsize - 1; wsizeb = replay->wsize << 3; /* Sequence number of 0 is invalid. */ @@ -1480,26 +1205,26 @@ ipsec_chkreplay(u_int32_t seq, struct secasvar *sav) if (replay->count == 0) return (1); - if (seq > replay->lastseq) { - /* Larger sequences are okay. */ + /* Larger sequences are okay. */ + if (seq > replay->lastseq) return (1); - } else { - /* seq is equal or less than lastseq. */ - diff = replay->lastseq - seq; - /* Over range to check, i.e. too old or wrapped. */ - if (diff >= wsizeb) - return (0); - - fr = frlast - diff / 8; + /* Over range to check, i.e. too old or wrapped. */ + if (replay->lastseq - seq >= wsizeb) + return (0); - /* This packet already seen? */ - if ((replay->bitmap)[fr] & (1 << (diff % 8))) - return (0); + /* The sequence is inside the sliding window + * now check the bit in the bitmap + * bit location only depends on the sequence number + */ + bit_location = seq & IPSEC_BITMAP_LOC_MASK; + index = (seq >> IPSEC_REDUNDANT_BIT_SHIFTS) + & IPSEC_BITMAP_INDEX_MASK(replay->bitmap_size); - /* Out of order but good. */ - return (1); - } + /* This packet already seen? */ + if ((replay->bitmap)[index] & (1 << bit_location)) + return (0); + return (1); } /* @@ -1508,13 +1233,12 @@ ipsec_chkreplay(u_int32_t seq, struct secasvar *sav) * 1: NG */ int -ipsec_updatereplay(u_int32_t seq, struct secasvar *sav) +ipsec_updatereplay(uint32_t seq, struct secasvar *sav) { + char buf[128]; struct secreplay *replay; - u_int32_t diff; - int fr; - u_int32_t wsizeb; /* Constant: bits of window size. */ - int frlast; /* Constant: last frame. */ + uint32_t wsizeb; /* Constant: window size. */ + int diff, index, bit_location; IPSEC_ASSERT(sav != NULL, ("Null SA")); IPSEC_ASSERT(sav->replay != NULL, ("Null replay state")); @@ -1525,58 +1249,46 @@ ipsec_updatereplay(u_int32_t seq, struct secasvar *sav) goto ok; /* No need to check replay. */ /* Constant. */ - frlast = replay->wsize - 1; wsizeb = replay->wsize << 3; /* Sequence number of 0 is invalid. */ if (seq == 0) return (1); - /* First time. */ - if (replay->count == 0) { - replay->lastseq = seq; - bzero(replay->bitmap, replay->wsize); - (replay->bitmap)[frlast] = 1; + /* The packet is too old, no need to update */ + if (wsizeb + seq < replay->lastseq) goto ok; - } + /* Now update the bit */ + index = (seq >> IPSEC_REDUNDANT_BIT_SHIFTS); + + /* First check if the sequence number is in the range */ if (seq > replay->lastseq) { - /* seq is larger than lastseq. */ - diff = seq - replay->lastseq; - - /* New larger sequence number. */ - if (diff < wsizeb) { - /* In window. */ - /* Set bit for this packet. */ - vshiftl(replay->bitmap, diff, replay->wsize); - (replay->bitmap)[frlast] |= 1; - } else { - /* This packet has a "way larger". */ - bzero(replay->bitmap, replay->wsize); - (replay->bitmap)[frlast] = 1; - } - replay->lastseq = seq; + int id; + int index_cur = replay->lastseq >> IPSEC_REDUNDANT_BIT_SHIFTS; - /* Larger is good. */ - } else { - /* seq is equal or less than lastseq. */ - diff = replay->lastseq - seq; + diff = index - index_cur; + if (diff > replay->bitmap_size) { + /* something unusual in this case */ + diff = replay->bitmap_size; + } - /* Over range to check, i.e. too old or wrapped. */ - if (diff >= wsizeb) - return (1); + for (id = 0; id < diff; ++id) { + replay->bitmap[(id + index_cur + 1) + & IPSEC_BITMAP_INDEX_MASK(replay->bitmap_size)] = 0; + } - fr = frlast - diff / 8; + replay->lastseq = seq; + } - /* This packet already seen? */ - if ((replay->bitmap)[fr] & (1 << (diff % 8))) - return (1); + index &= IPSEC_BITMAP_INDEX_MASK(replay->bitmap_size); + bit_location = seq & IPSEC_BITMAP_LOC_MASK; - /* Mark as seen. */ - (replay->bitmap)[fr] |= (1 << (diff % 8)); + /* this packet has already been received */ + if (replay->bitmap[index] & (1 << bit_location)) + return (1); - /* Out of order but good. */ - } + replay->bitmap[index] |= (1 << bit_location); ok: if (replay->count == ~0) { @@ -1585,167 +1297,99 @@ ok: replay->overflow++; /* Don't increment, no more packets accepted. */ - if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0) + if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0) { + if (sav->sah->saidx.proto == IPPROTO_AH) + AHSTAT_INC(ahs_wrap); + else if (sav->sah->saidx.proto == IPPROTO_ESP) + ESPSTAT_INC(esps_wrap); return (1); + } ipseclog((LOG_WARNING, "%s: replay counter made %d cycle. %s\n", - __func__, replay->overflow, ipsec_logsastr(sav))); + __func__, replay->overflow, + ipsec_sa2str(sav, buf, sizeof(buf)))); } - - replay->count++; - return (0); } -/* - * Shift variable length buffer to left. - * IN: bitmap: pointer to the buffer - * nbit: the number of to shift. - * wsize: buffer size (bytes). - */ -static void -vshiftl(unsigned char *bitmap, int nbit, int wsize) -{ - int s, j, i; - unsigned char over; - - for (j = 0; j < nbit; j += 8) { - s = (nbit - j < 8) ? (nbit - j): 8; - bitmap[0] <<= s; - for (i = 1; i < wsize; i++) { - over = (bitmap[i] >> (8 - s)); - bitmap[i] <<= s; - bitmap[i-1] |= over; - } - } -} - -#ifdef INET -/* Return a printable string for the IPv4 address. */ -static char * -inet_ntoa4(struct in_addr ina) -{ - static char buf[4][4 * sizeof "123" + 4]; - unsigned char *ucp = (unsigned char *) &ina; - static int i = 3; - - /* XXX-BZ Returns static buffer. */ - i = (i + 1) % 4; - sprintf(buf[i], "%d.%d.%d.%d", ucp[0] & 0xff, ucp[1] & 0xff, - ucp[2] & 0xff, ucp[3] & 0xff); - return (buf[i]); -} -#endif - -/* Return a printable string for the address. */ -char * -ipsec_address(union sockaddr_union* sa) +int +ipsec_updateid(struct secasvar *sav, uint64_t *new, uint64_t *old) { -#ifdef INET6 - char ip6buf[INET6_ADDRSTRLEN]; -#endif + uint64_t tmp; - switch (sa->sa.sa_family) { -#ifdef INET - case AF_INET: - return (inet_ntoa4(sa->sin.sin_addr)); -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - return (ip6_sprintf(ip6buf, &sa->sin6.sin6_addr)); -#endif /* INET6 */ - default: - return ("(unknown address family)"); + /* + * tdb_cryptoid is initialized by xform_init(). + * Then it can be changed only when some crypto error occurred or + * when SA is deleted. We stored used cryptoid in the xform_data + * structure. In case when crypto error occurred and crypto + * subsystem has reinited the session, it returns new cryptoid + * and EAGAIN error code. + * + * This function will be called when we got EAGAIN from crypto + * subsystem. + * *new is cryptoid that was returned by crypto subsystem in + * the crp_sid. + * *old is the original cryptoid that we stored in xform_data. + * + * For first failed request *old == sav->tdb_cryptoid, then + * we update sav->tdb_cryptoid and redo crypto_dispatch(). + * For next failed request *old != sav->tdb_cryptoid, then + * we store cryptoid from first request into the *new variable + * and crp_sid from this second session will be returned via + * *old pointer, so caller can release second session. + * + * XXXAE: check this more carefully. + */ + KEYDBG(IPSEC_STAMP, + printf("%s: SA(%p) moves cryptoid %jd -> %jd\n", + __func__, sav, (uintmax_t)(*old), (uintmax_t)(*new))); + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + SECASVAR_LOCK(sav); + if (sav->tdb_cryptoid != *old) { + /* cryptoid was already updated */ + tmp = *new; + *new = sav->tdb_cryptoid; + *old = tmp; + SECASVAR_UNLOCK(sav); + return (1); } + sav->tdb_cryptoid = *new; + SECASVAR_UNLOCK(sav); + return (0); } -const char * -ipsec_logsastr(struct secasvar *sav) +int +ipsec_initialized(void) { - static char buf[256]; - char *p; - struct secasindex *saidx = &sav->sah->saidx; - - IPSEC_ASSERT(saidx->src.sa.sa_family == saidx->dst.sa.sa_family, - ("address family mismatch")); - - p = buf; - snprintf(buf, sizeof(buf), "SA(SPI=%u ", (u_int32_t)ntohl(sav->spi)); - while (p && *p) - p++; - /* NB: only use ipsec_address on one address at a time. */ - snprintf(p, sizeof (buf) - (p - buf), "src=%s ", - ipsec_address(&saidx->src)); - while (p && *p) - p++; - snprintf(p, sizeof (buf) - (p - buf), "dst=%s)", - ipsec_address(&saidx->dst)); - - return (buf); -} -void -ipsec_dumpmbuf(struct mbuf *m) -{ - int totlen; - int i; - u_char *p; - - totlen = 0; - printf("---\n"); - while (m) { - p = mtod(m, u_char *); - for (i = 0; i < m->m_len; i++) { - printf("%02x ", p[i]); - totlen++; - if (totlen % 16 == 0) - printf("\n"); - } - m = m->m_next; - } - if (totlen % 16 != 0) - printf("\n"); - printf("---\n"); + return (V_def_policy != NULL); } static void -ipsec_init(const void *unused __unused) +def_policy_init(const void *unused __unused) { - SECPOLICY_LOCK_INIT(&V_ip4_def_policy); - V_ip4_def_policy.refcnt = 1; /* NB: disallow free. */ + V_def_policy = key_newsp(); + if (V_def_policy != NULL) { + V_def_policy->policy = IPSEC_POLICY_NONE; + /* Force INPCB SP cache invalidation */ + key_bumpspgen(); + } else + printf("%s: failed to initialize default policy\n", __func__); } -VNET_SYSINIT(ipsec_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, ipsec_init, - NULL); - - -/* XXX This stuff doesn't belong here... */ -static struct xformsw* xforms = NULL; -/* - * Register a transform; typically at system startup. - */ -void -xform_register(struct xformsw* xsp) +static void +def_policy_uninit(const void *unused __unused) { - xsp->xf_next = xforms; - xforms = xsp; + if (V_def_policy != NULL) { + key_freesp(&V_def_policy); + key_bumpspgen(); + } } -/* - * Initialize transform support in an sav. - */ -int -xform_init(struct secasvar *sav, int xftype) -{ - struct xformsw *xsp; - - if (sav->tdb_xform != NULL) /* Previously initialized. */ - return (0); - for (xsp = xforms; xsp; xsp = xsp->xf_next) - if (xsp->xf_type == xftype) - return ((*xsp->xf_init)(sav, xsp)); - return (EINVAL); -} +VNET_SYSINIT(def_policy_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, + def_policy_init, NULL); +VNET_SYSUNINIT(def_policy_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, + def_policy_uninit, NULL); diff --git a/freebsd/sys/netipsec/ipsec.h b/freebsd/sys/netipsec/ipsec.h index f3415872..7653e4d4 100644 --- a/freebsd/sys/netipsec/ipsec.h +++ b/freebsd/sys/netipsec/ipsec.h @@ -47,12 +47,11 @@ #ifdef _KERNEL -#define IPSEC_ASSERT(_c,_m) KASSERT(_c, _m) +#include +#include +#include -#define IPSEC_IS_PRIVILEGED_SO(_so) \ - ((_so)->so_cred != NULL && \ - priv_check_cred((_so)->so_cred, PRIV_NETINET_IPSEC, 0) \ - == 0) +#define IPSEC_ASSERT(_c,_m) KASSERT(_c, _m) /* * Security Policy Index @@ -61,37 +60,41 @@ * specifies ICMPv6 type, and the port field in "dst" specifies ICMPv6 code. */ struct secpolicyindex { - u_int8_t dir; /* direction of packet flow, see below */ union sockaddr_union src; /* IP src address for SP */ union sockaddr_union dst; /* IP dst address for SP */ - u_int8_t prefs; /* prefix length in bits for src */ - u_int8_t prefd; /* prefix length in bits for dst */ - u_int16_t ul_proto; /* upper layer Protocol */ -#ifdef notyet - uid_t uids; - uid_t uidd; - gid_t gids; - gid_t gidd; -#endif + uint8_t ul_proto; /* upper layer Protocol */ + uint8_t dir; /* direction of packet flow */ + uint8_t prefs; /* prefix length in bits for src */ + uint8_t prefd; /* prefix length in bits for dst */ +}; + +/* Request for IPsec */ +struct ipsecrequest { + struct secasindex saidx;/* hint for search proper SA */ + /* if __ss_len == 0 then no address specified.*/ + u_int level; /* IPsec level defined below. */ }; /* Security Policy Data Base */ struct secpolicy { - LIST_ENTRY(secpolicy) chain; - struct mtx lock; + TAILQ_ENTRY(secpolicy) chain; + LIST_ENTRY(secpolicy) idhash; + LIST_ENTRY(secpolicy) drainq; - u_int refcnt; /* reference count */ struct secpolicyindex spidx; /* selector */ - u_int32_t id; /* It's unique number on the system. */ - u_int state; /* 0: dead, others: alive */ -#define IPSEC_SPSTATE_DEAD 0 -#define IPSEC_SPSTATE_ALIVE 1 - u_int16_t policy; /* policy_type per pfkeyv2.h */ - u_int16_t scangen; /* scan generation # */ - struct ipsecrequest *req; - /* pointer to the ipsec request tree, */ - /* if policy == IPSEC else this value == NULL.*/ - +#define IPSEC_MAXREQ 4 + struct ipsecrequest *req[IPSEC_MAXREQ]; + u_int tcount; /* IPsec transforms count */ + volatile u_int refcnt; /* reference count */ + u_int policy; /* policy_type per pfkeyv2.h */ + u_int state; +#define IPSEC_SPSTATE_DEAD 0 +#define IPSEC_SPSTATE_LARVAL 1 +#define IPSEC_SPSTATE_ALIVE 2 +#define IPSEC_SPSTATE_PCB 3 +#define IPSEC_SPSTATE_IFNET 4 + uint32_t priority; /* priority of this policy */ + uint32_t id; /* It's unique number on the system. */ /* * lifetime handler. * the policy can be used without limitiation if both lifetime and @@ -105,48 +108,25 @@ struct secpolicy { long validtime; /* duration this policy is valid without use */ }; -#define SECPOLICY_LOCK_INIT(_sp) \ - mtx_init(&(_sp)->lock, "ipsec policy", NULL, MTX_DEF) -#define SECPOLICY_LOCK(_sp) mtx_lock(&(_sp)->lock) -#define SECPOLICY_UNLOCK(_sp) mtx_unlock(&(_sp)->lock) -#define SECPOLICY_LOCK_DESTROY(_sp) mtx_destroy(&(_sp)->lock) -#define SECPOLICY_LOCK_ASSERT(_sp) mtx_assert(&(_sp)->lock, MA_OWNED) - -/* Request for IPsec */ -struct ipsecrequest { - struct ipsecrequest *next; - /* pointer to next structure */ - /* If NULL, it means the end of chain. */ - struct secasindex saidx;/* hint for search proper SA */ - /* if __ss_len == 0 then no address specified.*/ - u_int level; /* IPsec level defined below. */ - - struct secasvar *sav; /* place holder of SA for use */ - struct secpolicy *sp; /* back pointer to SP */ - struct rwlock lock; /* to interlock updates */ -}; - /* - * Need recursion for when crypto callbacks happen directly, - * as in the case of software crypto. Need to look at how - * hard it is to remove this... + * PCB security policies. + * Application can setup private security policies for socket. + * Such policies can have IPSEC, BYPASS and ENTRUST type. + * By default, policies are set to NULL. This means that they have ENTRUST type. + * When application sets BYPASS or IPSEC type policy, the flags field + * is also updated. When flags is not set, the system could store + * used security policy into the sp_in/sp_out pointer to speed up further + * lookups. */ -#define IPSECREQUEST_LOCK_INIT(_isr) \ - rw_init_flags(&(_isr)->lock, "ipsec request", RW_RECURSE) -#define IPSECREQUEST_LOCK(_isr) rw_rlock(&(_isr)->lock) -#define IPSECREQUEST_UNLOCK(_isr) rw_runlock(&(_isr)->lock) -#define IPSECREQUEST_WLOCK(_isr) rw_wlock(&(_isr)->lock) -#define IPSECREQUEST_WUNLOCK(_isr) rw_wunlock(&(_isr)->lock) -#define IPSECREQUEST_UPGRADE(_isr) rw_try_upgrade(&(_isr)->lock) -#define IPSECREQUEST_DOWNGRADE(_isr) rw_downgrade(&(_isr)->lock) -#define IPSECREQUEST_LOCK_DESTROY(_isr) rw_destroy(&(_isr)->lock) -#define IPSECREQUEST_LOCK_ASSERT(_isr) rw_assert(&(_isr)->lock, RA_LOCKED) - -/* security policy in PCB */ struct inpcbpolicy { - struct secpolicy *sp_in; - struct secpolicy *sp_out; - int priv; /* privileged socket ? */ + struct secpolicy *sp_in; + struct secpolicy *sp_out; + + uint32_t genid; + uint16_t flags; +#define INP_INBOUND_POLICY 0x0001 +#define INP_OUTBOUND_POLICY 0x0002 + uint16_t hdrsz; }; /* SP acquiring list table. */ @@ -161,6 +141,9 @@ struct secspacq { }; #endif /* _KERNEL */ +/* buffer size for formatted output of ipsec address */ +#define IPSEC_ADDRSTRLEN (INET6_ADDRSTRLEN + 11) + /* according to IANA assignment, port 0x0000 and proto 0xff are reserved. */ #define IPSEC_PORT_ANY 0 #define IPSEC_ULPROTO_ANY 255 @@ -196,6 +179,12 @@ struct secspacq { #define IPSEC_POLICY_ENTRUST 3 /* consulting SPD if present. */ #define IPSEC_POLICY_BYPASS 4 /* only for privileged socket. */ +/* Policy scope */ +#define IPSEC_POLICYSCOPE_ANY 0x00 /* unspecified */ +#define IPSEC_POLICYSCOPE_GLOBAL 0x01 /* global scope */ +#define IPSEC_POLICYSCOPE_IFNET 0x02 /* if_ipsec(4) scope */ +#define IPSEC_POLICYSCOPE_PCB 0x04 /* PCB scope */ + /* Security protocol level */ #define IPSEC_LEVEL_DEFAULT 0 /* reference to system default */ #define IPSEC_LEVEL_USE 1 /* use SA if present. */ @@ -217,62 +206,33 @@ struct secspacq { /* statistics for ipsec processing */ struct ipsecstat { - u_quad_t in_success; /* succeeded inbound process */ - u_quad_t in_polvio; - /* security policy violation for inbound process */ - u_quad_t in_nosa; /* inbound SA is unavailable */ - u_quad_t in_inval; /* inbound processing failed due to EINVAL */ - u_quad_t in_nomem; /* inbound processing failed due to ENOBUFS */ - u_quad_t in_badspi; /* failed getting a SPI */ - u_quad_t in_ahreplay; /* AH replay check failed */ - u_quad_t in_espreplay; /* ESP replay check failed */ - u_quad_t in_ahauthsucc; /* AH authentication success */ - u_quad_t in_ahauthfail; /* AH authentication failure */ - u_quad_t in_espauthsucc; /* ESP authentication success */ - u_quad_t in_espauthfail; /* ESP authentication failure */ - u_quad_t in_esphist[256]; - u_quad_t in_ahhist[256]; - u_quad_t in_comphist[256]; - u_quad_t out_success; /* succeeded outbound process */ - u_quad_t out_polvio; - /* security policy violation for outbound process */ - u_quad_t out_nosa; /* outbound SA is unavailable */ - u_quad_t out_inval; /* outbound process failed due to EINVAL */ - u_quad_t out_nomem; /* inbound processing failed due to ENOBUFS */ - u_quad_t out_noroute; /* there is no route */ - u_quad_t out_esphist[256]; - u_quad_t out_ahhist[256]; - u_quad_t out_comphist[256]; - - u_quad_t spdcachelookup; - u_quad_t spdcachemiss; - - u_int32_t ips_in_polvio; /* input: sec policy violation */ - u_int32_t ips_out_polvio; /* output: sec policy violation */ - u_int32_t ips_out_nosa; /* output: SA unavailable */ - u_int32_t ips_out_nomem; /* output: no memory available */ - u_int32_t ips_out_noroute; /* output: no route available */ - u_int32_t ips_out_inval; /* output: generic error */ - u_int32_t ips_out_bundlesa; /* output: bundled SA processed */ - u_int32_t ips_mbcoalesced; /* mbufs coalesced during clone */ - u_int32_t ips_clcoalesced; /* clusters coalesced during clone */ - u_int32_t ips_clcopied; /* clusters copied during clone */ - u_int32_t ips_mbinserted; /* mbufs inserted during makespace */ + uint64_t ips_in_polvio; /* input: sec policy violation */ + uint64_t ips_in_nomem; /* input: no memory available */ + uint64_t ips_in_inval; /* input: generic error */ + + uint64_t ips_out_polvio; /* output: sec policy violation */ + uint64_t ips_out_nosa; /* output: SA unavailable */ + uint64_t ips_out_nomem; /* output: no memory available */ + uint64_t ips_out_noroute; /* output: no route available */ + uint64_t ips_out_inval; /* output: generic error */ + uint64_t ips_out_bundlesa; /* output: bundled SA processed */ + + uint64_t ips_mbcoalesced; /* mbufs coalesced during clone */ + uint64_t ips_clcoalesced; /* clusters coalesced during clone */ + uint64_t ips_clcopied; /* clusters copied during clone */ + uint64_t ips_mbinserted; /* mbufs inserted during makespace */ /* * Temporary statistics for performance analysis. */ /* See where ESP/AH/IPCOMP header land in mbuf on input */ - u_int32_t ips_input_front; - u_int32_t ips_input_middle; - u_int32_t ips_input_end; + uint64_t ips_input_front; + uint64_t ips_input_middle; + uint64_t ips_input_end; }; /* * Definitions for IPsec & Key sysctl operations. */ -/* - * Names for IPsec & Key sysctl objects - */ #define IPSECCTL_STATS 1 /* stats */ #define IPSECCTL_DEF_POLICY 2 #define IPSECCTL_DEF_ESP_TRANSLEV 3 /* int; ESP transport mode */ @@ -288,53 +248,18 @@ struct ipsecstat { #define IPSECCTL_ECN 11 #define IPSECCTL_DEBUG 12 #define IPSECCTL_ESP_RANDPAD 13 -#define IPSECCTL_MAXID 14 - -#define IPSECCTL_NAMES { \ - { 0, 0 }, \ - { 0, 0 }, \ - { "def_policy", CTLTYPE_INT }, \ - { "esp_trans_deflev", CTLTYPE_INT }, \ - { "esp_net_deflev", CTLTYPE_INT }, \ - { "ah_trans_deflev", CTLTYPE_INT }, \ - { "ah_net_deflev", CTLTYPE_INT }, \ - { 0, 0 }, \ - { "ah_cleartos", CTLTYPE_INT }, \ - { "ah_offsetmask", CTLTYPE_INT }, \ - { "dfbit", CTLTYPE_INT }, \ - { "ecn", CTLTYPE_INT }, \ - { "debug", CTLTYPE_INT }, \ - { "esp_randpad", CTLTYPE_INT }, \ -} - -#define IPSEC6CTL_NAMES { \ - { 0, 0 }, \ - { 0, 0 }, \ - { "def_policy", CTLTYPE_INT }, \ - { "esp_trans_deflev", CTLTYPE_INT }, \ - { "esp_net_deflev", CTLTYPE_INT }, \ - { "ah_trans_deflev", CTLTYPE_INT }, \ - { "ah_net_deflev", CTLTYPE_INT }, \ - { 0, 0 }, \ - { 0, 0 }, \ - { 0, 0 }, \ - { 0, 0 }, \ - { "ecn", CTLTYPE_INT }, \ - { "debug", CTLTYPE_INT }, \ - { "esp_randpad", CTLTYPE_INT }, \ -} #ifdef _KERNEL -struct ipsec_output_state { - struct mbuf *m; - struct route *ro; - struct sockaddr *dst; -}; +#include -struct ipsec_history { - int ih_proto; - u_int32_t ih_spi; -}; +struct ipsec_ctx_data; +#define IPSEC_INIT_CTX(_ctx, _mp, _sav, _af, _enc) do { \ + (_ctx)->mp = (_mp); \ + (_ctx)->sav = (_sav); \ + (_ctx)->af = (_af); \ + (_ctx)->enc = (_enc); \ +} while(0) +int ipsec_run_hhooks(struct ipsec_ctx_data *ctx, int direction); VNET_DECLARE(int, ipsec_debug); #define V_ipsec_debug VNET(ipsec_debug) @@ -347,8 +272,7 @@ VNET_DECLARE(int, ipsec_integrity); #define V_ipsec_integrity VNET(ipsec_integrity) #endif -VNET_DECLARE(struct ipsecstat, ipsec4stat); -VNET_DECLARE(struct secpolicy, ip4_def_policy); +VNET_PCPUSTAT_DECLARE(struct ipsecstat, ipsec4stat); VNET_DECLARE(int, ip4_esp_trans_deflev); VNET_DECLARE(int, ip4_esp_net_deflev); VNET_DECLARE(int, ip4_ah_trans_deflev); @@ -358,10 +282,10 @@ VNET_DECLARE(int, ip4_ipsec_dfbit); VNET_DECLARE(int, ip4_ipsec_ecn); VNET_DECLARE(int, ip4_esp_randpad); VNET_DECLARE(int, crypto_support); +VNET_DECLARE(int, natt_cksum_policy); -#define IPSECSTAT_INC(name) V_ipsec4stat.name += 1 -#define V_ipsec4stat VNET(ipsec4stat) -#define V_ip4_def_policy VNET(ip4_def_policy) +#define IPSECSTAT_INC(name) \ + VNET_PCPUSTAT_ADD(struct ipsecstat, ipsec4stat, name, 1) #define V_ip4_esp_trans_deflev VNET(ip4_esp_trans_deflev) #define V_ip4_esp_net_deflev VNET(ip4_esp_net_deflev) #define V_ip4_ah_trans_deflev VNET(ip4_ah_trans_deflev) @@ -371,64 +295,52 @@ VNET_DECLARE(int, crypto_support); #define V_ip4_ipsec_ecn VNET(ip4_ipsec_ecn) #define V_ip4_esp_randpad VNET(ip4_esp_randpad) #define V_crypto_support VNET(crypto_support) +#define V_natt_cksum_policy VNET(natt_cksum_policy) #define ipseclog(x) do { if (V_ipsec_debug) log x; } while (0) /* for openbsd compatibility */ #define DPRINTF(x) do { if (V_ipsec_debug) printf x; } while (0) -extern struct ipsecrequest *ipsec_newisr(void); -extern void ipsec_delisr(struct ipsecrequest *); - -struct tdb_ident; -extern struct secpolicy *ipsec_getpolicy __P((struct tdb_ident*, u_int)); struct inpcb; -extern struct secpolicy *ipsec4_checkpolicy __P((struct mbuf *, u_int, u_int, - int *, struct inpcb *)); -extern struct secpolicy * ipsec_getpolicybyaddr(struct mbuf *, u_int, - int, int *); +struct m_tag; +struct secasvar; +struct sockopt; +struct tcphdr; +union sockaddr_union; -struct inpcb; -extern int ipsec_init_policy __P((struct socket *so, struct inpcbpolicy **)); -extern int ipsec_copy_policy - __P((struct inpcbpolicy *, struct inpcbpolicy *)); -extern u_int ipsec_get_reqlevel __P((struct ipsecrequest *)); -extern int ipsec_in_reject __P((struct secpolicy *, struct mbuf *)); - -extern int ipsec_set_policy __P((struct inpcb *inp, int optname, - caddr_t request, size_t len, struct ucred *cred)); -extern int ipsec_get_policy __P((struct inpcb *inpcb, caddr_t request, - size_t len, struct mbuf **mp)); -extern int ipsec_delete_pcbpolicy __P((struct inpcb *)); -extern int ipsec4_in_reject __P((struct mbuf *, struct inpcb *)); - -struct secas; -struct tcpcb; -extern int ipsec_chkreplay __P((u_int32_t, struct secasvar *)); -extern int ipsec_updatereplay __P((u_int32_t, struct secasvar *)); - -extern size_t ipsec_hdrsiz __P((struct mbuf *, u_int, struct inpcb *)); -extern size_t ipsec_hdrsiz_tcp __P((struct tcpcb *)); +int ipsec_if_input(struct mbuf *, struct secasvar *, uint32_t); -union sockaddr_union; -extern char * ipsec_address(union sockaddr_union* sa); -extern const char *ipsec_logsastr __P((struct secasvar *)); +struct ipsecrequest *ipsec_newisr(void); +void ipsec_delisr(struct ipsecrequest *); +struct secpolicy *ipsec4_checkpolicy(const struct mbuf *, struct inpcb *, + int *); -extern void ipsec_dumpmbuf __P((struct mbuf *)); +u_int ipsec_get_reqlevel(struct secpolicy *, u_int); -struct m_tag; -extern void ah4_input(struct mbuf *m, int off); -extern void ah4_ctlinput(int cmd, struct sockaddr *sa, void *); -extern void esp4_input(struct mbuf *m, int off); -extern void esp4_ctlinput(int cmd, struct sockaddr *sa, void *); -extern void ipcomp4_input(struct mbuf *m, int off); -extern int ipsec4_common_input(struct mbuf *m, ...); -extern int ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, - int skip, int protoff, struct m_tag *mt); -extern int ipsec4_process_packet __P((struct mbuf *, struct ipsecrequest *, - int, int)); -extern int ipsec_process_done __P((struct mbuf *, struct ipsecrequest *)); - -extern struct mbuf *ipsec_copypkt __P((struct mbuf *)); +void udp_ipsec_adjust_cksum(struct mbuf *, struct secasvar *, int, int); +int udp_ipsec_output(struct mbuf *, struct secasvar *); +int udp_ipsec_input(struct mbuf *, int, int); +int udp_ipsec_pcbctl(struct inpcb *, struct sockopt *); + +int ipsec_chkreplay(uint32_t, struct secasvar *); +int ipsec_updatereplay(uint32_t, struct secasvar *); +int ipsec_updateid(struct secasvar *, uint64_t *, uint64_t *); +int ipsec_initialized(void); + +void ipsec_setspidx_inpcb(struct inpcb *, struct secpolicyindex *, u_int); + +void ipsec4_setsockaddrs(const struct mbuf *, union sockaddr_union *, + union sockaddr_union *); +int ipsec4_in_reject(const struct mbuf *, struct inpcb *); +int ipsec4_input(struct mbuf *, int, int); +int ipsec4_forward(struct mbuf *); +int ipsec4_pcbctl(struct inpcb *, struct sockopt *); +int ipsec4_output(struct mbuf *, struct inpcb *); +int ipsec4_capability(struct mbuf *, u_int); +int ipsec4_common_input_cb(struct mbuf *, struct secasvar *, int, int); +int ipsec4_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); +int ipsec_process_done(struct mbuf *, struct secpolicy *, struct secasvar *, + u_int); extern void m_checkalignment(const char* where, struct mbuf *m0, int off, int len); @@ -436,22 +348,13 @@ extern struct mbuf *m_makespace(struct mbuf *m0, int skip, int hlen, int *off); extern caddr_t m_pad(struct mbuf *m, int n); extern int m_striphdr(struct mbuf *m, int skip, int hlen); -#ifdef DEV_ENC -#define ENC_BEFORE 0x0001 -#define ENC_AFTER 0x0002 -#define ENC_IN 0x0100 -#define ENC_OUT 0x0200 -extern int ipsec_filter(struct mbuf **, int, int); -extern void ipsec_bpf(struct mbuf *, struct secasvar *, int, int); -#endif #endif /* _KERNEL */ #ifndef _KERNEL -extern caddr_t ipsec_set_policy __P((char *, int)); -extern int ipsec_get_policylen __P((caddr_t)); -extern char *ipsec_dump_policy __P((caddr_t, char *)); - -extern const char *ipsec_strerror __P((void)); +extern caddr_t ipsec_set_policy(char *, int); +extern int ipsec_get_policylen(caddr_t); +extern char *ipsec_dump_policy(caddr_t, char *); +extern const char *ipsec_strerror(void); #endif /* ! KERNEL */ diff --git a/freebsd/sys/netipsec/ipsec6.h b/freebsd/sys/netipsec/ipsec6.h index 21ec6b36..a5fae4d1 100644 --- a/freebsd/sys/netipsec/ipsec6.h +++ b/freebsd/sys/netipsec/ipsec6.h @@ -41,15 +41,17 @@ #include #ifdef _KERNEL -VNET_DECLARE(struct ipsecstat, ipsec6stat); +#include + +VNET_PCPUSTAT_DECLARE(struct ipsecstat, ipsec6stat); VNET_DECLARE(int, ip6_esp_trans_deflev); VNET_DECLARE(int, ip6_esp_net_deflev); VNET_DECLARE(int, ip6_ah_trans_deflev); VNET_DECLARE(int, ip6_ah_net_deflev); VNET_DECLARE(int, ip6_ipsec_ecn); -#define IPSEC6STAT_INC(name) V_ipsec6stat.name += 1 -#define V_ipsec6stat VNET(ipsec6stat) +#define IPSEC6STAT_INC(name) \ + VNET_PCPUSTAT_ADD(struct ipsecstat, ipsec6stat, name, 1) #define V_ip6_esp_trans_deflev VNET(ip6_esp_trans_deflev) #define V_ip6_esp_net_deflev VNET(ip6_esp_net_deflev) #define V_ip6_ah_trans_deflev VNET(ip6_ah_trans_deflev) @@ -57,23 +59,22 @@ VNET_DECLARE(int, ip6_ipsec_ecn); #define V_ip6_ipsec_ecn VNET(ip6_ipsec_ecn) struct inpcb; +struct secpolicy *ipsec6_checkpolicy(const struct mbuf *, + struct inpcb *, int *); -extern int ipsec6_in_reject __P((struct mbuf *, struct inpcb *)); - -struct ip6_hdr; -extern const char *ipsec6_logpacketstr __P((struct ip6_hdr *, u_int32_t)); - -struct m_tag; -extern int ipsec6_common_input(struct mbuf **mp, int *offp, int proto); -extern int ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, - int skip, int protoff, struct m_tag *mt); -extern void esp6_ctlinput(int, struct sockaddr *, void *); +void ipsec6_setsockaddrs(const struct mbuf *, union sockaddr_union *, + union sockaddr_union *); +int ipsec6_input(struct mbuf *, int, int); +int ipsec6_in_reject(const struct mbuf *, struct inpcb *); +int ipsec6_forward(struct mbuf *); +int ipsec6_pcbctl(struct inpcb *, struct sockopt *); +int ipsec6_output(struct mbuf *, struct inpcb *); +int ipsec6_capability(struct mbuf *, u_int); +int ipsec6_common_input_cb(struct mbuf *, struct secasvar *, int, int); +int ipsec6_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); -struct ipsec_output_state; -extern int ipsec6_output_trans __P((struct ipsec_output_state *, u_char *, - struct mbuf *, struct secpolicy *, int, int *)); -extern int ipsec6_output_tunnel __P((struct ipsec_output_state *, - struct secpolicy *, int)); +int ip6_ipsec_filtertunnel(struct mbuf *); +int ip6_ipsec_pcbctl(struct inpcb *, struct sockopt *); #endif /*_KERNEL*/ #endif /*_NETIPSEC_IPSEC6_H_*/ diff --git a/freebsd/sys/netipsec/ipsec_input.c b/freebsd/sys/netipsec/ipsec_input.c index d910de71..50e7d646 100644 --- a/freebsd/sys/netipsec/ipsec_input.c +++ b/freebsd/sys/netipsec/ipsec_input.c @@ -1,6 +1,5 @@ #include -/* $FreeBSD$ */ /* $OpenBSD: ipsec_input.c,v 1.63 2003/02/20 18:35:43 deraadt Exp $ */ /*- * The authors of this code are John Ioannidis (ji@tla.org), @@ -21,6 +20,7 @@ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, * Angelos D. Keromytis and Niels Provos. * Copyright (c) 2001, Angelos D. Keromytis. + * Copyright (c) 2016 Andrey V. Elsukov * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in @@ -42,10 +42,12 @@ * IPsec input processing. */ +#include +__FBSDID("$FreeBSD$"); + #include #include #include -#include #include #include @@ -55,11 +57,12 @@ #include #include #include +#include #include #include -#include -#include +#include +#include #include #include @@ -89,6 +92,7 @@ #include #include +#include #include #include @@ -96,10 +100,6 @@ #include #include -#ifdef DEV_ENC -#include -#endif - #define IPSEC_ISTAT(proto, name) do { \ if ((proto) == IPPROTO_ESP) \ @@ -110,10 +110,6 @@ IPCOMPSTAT_INC(ipcomps_##name); \ } while (0) -#ifdef INET -static void ipsec4_common_ctlinput(int, struct sockaddr *, void *, int); -#endif - /* * ipsec_common_input gets called when an IPsec-protected packet * is received by IPv4 or IPv6. Its job is to find the right SA @@ -123,15 +119,11 @@ static void ipsec4_common_ctlinput(int, struct sockaddr *, void *, int); static int ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) { + char buf[IPSEC_ADDRSTRLEN]; union sockaddr_union dst_address; struct secasvar *sav; - u_int32_t spi; + uint32_t spi; int error; -#ifdef INET -#ifdef IPSEC_NAT_T - struct m_tag *tag; -#endif -#endif IPSEC_ISTAT(sproto, input); @@ -183,12 +175,6 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) m_copydata(m, offsetof(struct ip, ip_dst), sizeof(struct in_addr), (caddr_t) &dst_address.sin.sin_addr); -#ifdef IPSEC_NAT_T - /* Find the source port for NAT-T; see udp*_espdecap. */ - tag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL); - if (tag != NULL) - dst_address.sin.sin_port = ((u_int16_t *)(tag + 1))[1]; -#endif /* IPSEC_NAT_T */ break; #endif /* INET */ #ifdef INET6 @@ -197,6 +183,13 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) m_copydata(m, offsetof(struct ip6_hdr, ip6_dst), sizeof(struct in6_addr), (caddr_t) &dst_address.sin6.sin6_addr); + /* We keep addresses in SADB without embedded scope id */ + if (IN6_IS_SCOPE_LINKLOCAL(&dst_address.sin6.sin6_addr)) { + /* XXX: sa6_recoverscope() */ + dst_address.sin6.sin6_scope_id = + ntohs(dst_address.sin6.sin6_addr.s6_addr16[1]); + dst_address.sin6.sin6_addr.s6_addr16[1] = 0; + } break; #endif /* INET6 */ default: @@ -207,11 +200,11 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) } /* NB: only pass dst since key_allocsa follows RFC2401 */ - sav = KEY_ALLOCSA(&dst_address, sproto, spi); + sav = key_allocsa(&dst_address, sproto, spi); if (sav == NULL) { DPRINTF(("%s: no key association found for SA %s/%08lx/%u\n", - __func__, ipsec_address(&dst_address), - (u_long) ntohl(spi), sproto)); + __func__, ipsec_address(&dst_address, buf, sizeof(buf)), + (u_long) ntohl(spi), sproto)); IPSEC_ISTAT(sproto, notdb); m_freem(m); return ENOENT; @@ -219,10 +212,10 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) if (sav->tdb_xform == NULL) { DPRINTF(("%s: attempted to use uninitialized SA %s/%08lx/%u\n", - __func__, ipsec_address(&dst_address), - (u_long) ntohl(spi), sproto)); + __func__, ipsec_address(&dst_address, buf, sizeof(buf)), + (u_long) ntohl(spi), sproto)); IPSEC_ISTAT(sproto, noxform); - KEY_FREESAV(&sav); + key_freesav(&sav); m_freem(m); return ENXIO; } @@ -232,59 +225,50 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) * everything else. */ error = (*sav->tdb_xform->xf_input)(m, sav, skip, protoff); - KEY_FREESAV(&sav); - return error; + if (error != 0) + key_freesav(&sav); + return (error); } #ifdef INET +extern struct protosw inetsw[]; + /* - * Common input handler for IPv4 AH, ESP, and IPCOMP. + * IPSEC_INPUT() method implementation for IPv4. + * 0 - Permitted by inbound security policy for further processing. + * EACCES - Forbidden by inbound security policy. + * EINPROGRESS - consumed by IPsec. */ int -ipsec4_common_input(struct mbuf *m, ...) -{ - va_list ap; - int off, nxt; - - va_start(ap, m); - off = va_arg(ap, int); - nxt = va_arg(ap, int); - va_end(ap); - - return ipsec_common_input(m, off, offsetof(struct ip, ip_p), - AF_INET, nxt); -} - -void -ah4_input(struct mbuf *m, int off) -{ - ipsec4_common_input(m, off, IPPROTO_AH); -} -void -ah4_ctlinput(int cmd, struct sockaddr *sa, void *v) -{ - if (sa->sa_family == AF_INET && - sa->sa_len == sizeof(struct sockaddr_in)) - ipsec4_common_ctlinput(cmd, sa, v, IPPROTO_AH); -} - -void -esp4_input(struct mbuf *m, int off) -{ - ipsec4_common_input(m, off, IPPROTO_ESP); -} -void -esp4_ctlinput(int cmd, struct sockaddr *sa, void *v) +ipsec4_input(struct mbuf *m, int offset, int proto) { - if (sa->sa_family == AF_INET && - sa->sa_len == sizeof(struct sockaddr_in)) - ipsec4_common_ctlinput(cmd, sa, v, IPPROTO_ESP); -} -void -ipcomp4_input(struct mbuf *m, int off) -{ - ipsec4_common_input(m, off, IPPROTO_IPCOMP); + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_IPCOMP: + /* Do inbound IPsec processing for AH/ESP/IPCOMP */ + ipsec_common_input(m, offset, + offsetof(struct ip, ip_p), AF_INET, proto); + return (EINPROGRESS); /* mbuf consumed by IPsec */ + default: + /* + * Protocols with further headers get their IPsec treatment + * within the protocol specific processing. + */ + if ((inetsw[ip_protox[proto]].pr_flags & PR_LASTHDR) == 0) + return (0); + /* FALLTHROUGH */ + }; + /* + * Enforce IPsec policy checking if we are seeing last header. + */ + if (ipsec4_in_reject(m, NULL) != 0) { + /* Forbidden by inbound security policy */ + m_freem(m); + return (EACCES); + } + return (0); } /* @@ -294,22 +278,17 @@ ipcomp4_input(struct mbuf *m, int off) * the processed packet. */ int -ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, - int skip, int protoff, struct m_tag *mt) +ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, + int protoff) { - int prot, af, sproto; - struct ip *ip; - struct m_tag *mtag; - struct tdb_ident *tdbi; + char buf[IPSEC_ADDRSTRLEN]; + struct ipsec_ctx_data ctx; + struct xform_history *xh; struct secasindex *saidx; - int error; -#ifdef INET6 -#ifdef notyet - char ip6buf[INET6_ADDRSTRLEN]; -#endif -#endif + struct m_tag *mtag; + struct ip *ip; + int error, prot, af, sproto, isr_prot; - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); saidx = &sav->sah->saidx; @@ -320,20 +299,14 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, sproto == IPPROTO_IPCOMP, ("unexpected security protocol %u", sproto)); - /* Sanity check */ - if (m == NULL) { - DPRINTF(("%s: null mbuf", __func__)); - IPSEC_ISTAT(sproto, badkcr); - KEY_FREESAV(&sav); - return EINVAL; - } - if (skip != 0) { - /* Fix IPv4 header */ + /* + * Fix IPv4 header + */ if (m->m_len < skip && (m = m_pullup(m, skip)) == NULL) { DPRINTF(("%s: processing failed for SA %s/%08lx\n", - __func__, ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + __func__, ipsec_address(&sav->sah->saidx.dst, + buf, sizeof(buf)), (u_long) ntohl(sav->spi))); IPSEC_ISTAT(sproto, hdrops); error = ENOBUFS; goto bad; @@ -341,106 +314,67 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, ip = mtod(m, struct ip *); ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; ip->ip_sum = in_cksum(m, ip->ip_hl << 2); } else { ip = mtod(m, struct ip *); } prot = ip->ip_p; + /* + * Check that we have NAT-T enabled and apply transport mode + * decapsulation NAT procedure (RFC3948). + * Do this before invoking into the PFIL. + */ + if (sav->natt != NULL && + (prot == IPPROTO_UDP || prot == IPPROTO_TCP)) + udp_ipsec_adjust_cksum(m, sav, prot, skip); -#ifdef notyet - /* IP-in-IP encapsulation */ - if (prot == IPPROTO_IPIP) { - struct ip ipn; + IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET, IPSEC_ENC_BEFORE); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) + goto bad; + ip = mtod(m, struct ip *); /* update pointer */ + /* IP-in-IP encapsulation */ + if (prot == IPPROTO_IPIP && + saidx->mode != IPSEC_MODE_TRANSPORT) { if (m->m_pkthdr.len - skip < sizeof(struct ip)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; goto bad; } - /* ipn will now contain the inner IPv4 header */ - m_copydata(m, ip->ip_hl << 2, sizeof(struct ip), - (caddr_t) &ipn); - - /* XXX PROXY address isn't recorded in SAH */ - /* - * Check that the inner source address is the same as - * the proxy address, if available. - */ - if ((saidx->proxy.sa.sa_family == AF_INET && - saidx->proxy.sin.sin_addr.s_addr != - INADDR_ANY && - ipn.ip_src.s_addr != - saidx->proxy.sin.sin_addr.s_addr) || - (saidx->proxy.sa.sa_family != AF_INET && - saidx->proxy.sa.sa_family != 0)) { - - DPRINTF(("%s: inner source address %s doesn't " - "correspond to expected proxy source %s, " - "SA %s/%08lx\n", __func__, - inet_ntoa4(ipn.ip_src), - ipsp_address(saidx->proxy), - ipsp_address(saidx->dst), - (u_long) ntohl(sav->spi))); - - IPSEC_ISTAT(sproto, pdrops); - error = EACCES; - goto bad; - } + /* enc0: strip outer IPv4 header */ + m_striphdr(m, 0, ip->ip_hl << 2); } #ifdef INET6 /* IPv6-in-IP encapsulation. */ - if (prot == IPPROTO_IPV6) { - struct ip6_hdr ip6n; - + else if (prot == IPPROTO_IPV6 && + saidx->mode != IPSEC_MODE_TRANSPORT) { if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; goto bad; } - /* ip6n will now contain the inner IPv6 header. */ - m_copydata(m, ip->ip_hl << 2, sizeof(struct ip6_hdr), - (caddr_t) &ip6n); - + /* enc0: strip IPv4 header, keep IPv6 header only */ + m_striphdr(m, 0, ip->ip_hl << 2); + } +#endif /* INET6 */ + else if (prot != IPPROTO_IPV6 && saidx->mode == IPSEC_MODE_ANY) { /* - * Check that the inner source address is the same as - * the proxy address, if available. + * When mode is wildcard, inner protocol is IPv6 and + * we have no INET6 support - drop this packet a bit later. + * In other cases we assume transport mode. Set prot to + * correctly choose netisr. */ - if ((saidx->proxy.sa.sa_family == AF_INET6 && - !IN6_IS_ADDR_UNSPECIFIED(&saidx->proxy.sin6.sin6_addr) && - !IN6_ARE_ADDR_EQUAL(&ip6n.ip6_src, - &saidx->proxy.sin6.sin6_addr)) || - (saidx->proxy.sa.sa_family != AF_INET6 && - saidx->proxy.sa.sa_family != 0)) { - - DPRINTF(("%s: inner source address %s doesn't " - "correspond to expected proxy source %s, " - "SA %s/%08lx\n", __func__, - ip6_sprintf(ip6buf, &ip6n.ip6_src), - ipsec_address(&saidx->proxy), - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - - IPSEC_ISTAT(sproto, pdrops); - error = EACCES; - goto bad; - } + prot = IPPROTO_IPIP; } -#endif /* INET6 */ -#endif /*XXX*/ /* * Record what we've done to the packet (under what SA it was - * processed). If we've been passed an mtag, it means the packet - * was already processed by an ethernet/crypto combo card and - * thus has a tag attached with all the right information, but - * with a PACKET_TAG_IPSEC_IN_CRYPTO_DONE as opposed to - * PACKET_TAG_IPSEC_IN_DONE type; in that case, just change the type. + * processed). */ - if (mt == NULL && sproto != IPPROTO_IPCOMP) { + if (sproto != IPPROTO_IPCOMP) { mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE, - sizeof(struct tdb_ident), M_NOWAIT); + sizeof(struct xform_history), M_NOWAIT); if (mtag == NULL) { DPRINTF(("%s: failed to get tag\n", __func__)); IPSEC_ISTAT(sproto, hdrops); @@ -448,104 +382,106 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, goto bad; } - tdbi = (struct tdb_ident *)(mtag + 1); - bcopy(&saidx->dst, &tdbi->dst, saidx->dst.sa.sa_len); - tdbi->proto = sproto; - tdbi->spi = sav->spi; - /* Cache those two for enc(4) in xform_ipip. */ - tdbi->alg_auth = sav->alg_auth; - tdbi->alg_enc = sav->alg_enc; - + xh = (struct xform_history *)(mtag + 1); + bcopy(&saidx->dst, &xh->dst, saidx->dst.sa.sa_len); + xh->spi = sav->spi; + xh->proto = sproto; + xh->mode = saidx->mode; m_tag_prepend(m, mtag); - } else if (mt != NULL) { - mt->m_tag_id = PACKET_TAG_IPSEC_IN_DONE; - /* XXX do we need to mark m_flags??? */ } key_sa_recordxfer(sav, m); /* record data transfer */ - m_addr_changed(m); - -#ifdef DEV_ENC - encif->if_ipackets++; - encif->if_ibytes += m->m_pkthdr.len; - /* - * Pass the mbuf to enc0 for bpf and pfil. We will filter the IPIP - * packet later after it has been decapsulated. + * In transport mode requeue decrypted mbuf back to IPv4 protocol + * handler. This is necessary to correctly expose rcvif. */ - ipsec_bpf(m, sav, AF_INET, ENC_IN|ENC_BEFORE); - - if (prot != IPPROTO_IPIP) - if ((error = ipsec_filter(&m, PFIL_IN, ENC_IN|ENC_BEFORE)) != 0) - return (error); -#endif - + if (saidx->mode == IPSEC_MODE_TRANSPORT) + prot = IPPROTO_IPIP; /* * Re-dispatch via software interrupt. */ - if ((error = netisr_queue_src(NETISR_IP, (uintptr_t)sav->spi, m))) { - IPSEC_ISTAT(sproto, qfull); - DPRINTF(("%s: queue full; proto %u packet dropped\n", - __func__, sproto)); - return error; + switch (prot) { + case IPPROTO_IPIP: + isr_prot = NETISR_IP; + af = AF_INET; + break; +#ifdef INET6 + case IPPROTO_IPV6: + isr_prot = NETISR_IPV6; + af = AF_INET6; + break; +#endif + default: + DPRINTF(("%s: cannot handle inner ip proto %d\n", + __func__, prot)); + IPSEC_ISTAT(sproto, nopf); + error = EPFNOSUPPORT; + goto bad; } - return 0; -bad: - m_freem(m); - return error; -} -void -ipsec4_common_ctlinput(int cmd, struct sockaddr *sa, void *v, int proto) -{ - /* XXX nothing just yet */ + IPSEC_INIT_CTX(&ctx, &m, sav, af, IPSEC_ENC_AFTER); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) + goto bad; + + /* Handle virtual tunneling interfaces */ + if (saidx->mode == IPSEC_MODE_TUNNEL) + error = ipsec_if_input(m, sav, af); + if (error == 0) { + error = netisr_queue_src(isr_prot, (uintptr_t)sav->spi, m); + if (error) { + IPSEC_ISTAT(sproto, qfull); + DPRINTF(("%s: queue full; proto %u packet dropped\n", + __func__, sproto)); + } + } + key_freesav(&sav); + return (error); +bad: + key_freesav(&sav); + if (m != NULL) + m_freem(m); + return (error); } #endif /* INET */ #ifdef INET6 -/* IPv6 AH wrapper. */ +/* + * IPSEC_INPUT() method implementation for IPv6. + * 0 - Permitted by inbound security policy for further processing. + * EACCES - Forbidden by inbound security policy. + * EINPROGRESS - consumed by IPsec. + */ int -ipsec6_common_input(struct mbuf **mp, int *offp, int proto) +ipsec6_input(struct mbuf *m, int offset, int proto) { - int l = 0; - int protoff; - struct ip6_ext ip6e; - - if (*offp < sizeof(struct ip6_hdr)) { - DPRINTF(("%s: bad offset %u\n", __func__, *offp)); - return IPPROTO_DONE; - } else if (*offp == sizeof(struct ip6_hdr)) { - protoff = offsetof(struct ip6_hdr, ip6_nxt); - } else { - /* Chase down the header chain... */ - protoff = sizeof(struct ip6_hdr); - - do { - protoff += l; - m_copydata(*mp, protoff, sizeof(ip6e), - (caddr_t) &ip6e); - - if (ip6e.ip6e_nxt == IPPROTO_AH) - l = (ip6e.ip6e_len + 2) << 2; - else - l = (ip6e.ip6e_len + 1) << 3; - IPSEC_ASSERT(l > 0, ("l went zero or negative")); - } while (protoff + l < *offp); - - /* Malformed packet check */ - if (protoff + l != *offp) { - DPRINTF(("%s: bad packet header chain, protoff %u, " - "l %u, off %u\n", __func__, protoff, l, *offp)); - IPSEC_ISTAT(proto, hdrops); - m_freem(*mp); - *mp = NULL; - return IPPROTO_DONE; - } - protoff += offsetof(struct ip6_ext, ip6e_nxt); + + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_IPCOMP: + /* Do inbound IPsec processing for AH/ESP/IPCOMP */ + ipsec_common_input(m, offset, + offsetof(struct ip6_hdr, ip6_nxt), AF_INET6, proto); + return (EINPROGRESS); /* mbuf consumed by IPsec */ + default: + /* + * Protocols with further headers get their IPsec treatment + * within the protocol specific processing. + */ + if ((inet6sw[ip6_protox[proto]].pr_flags & PR_LASTHDR) == 0) + return (0); + /* FALLTHROUGH */ + }; + /* + * Enforce IPsec policy checking if we are seeing last header. + */ + if (ipsec6_in_reject(m, NULL) != 0) { + /* Forbidden by inbound security policy */ + m_freem(m); + return (EACCES); } - (void) ipsec_common_input(*mp, *offp, protoff, AF_INET6, proto); - return IPPROTO_DONE; + return (0); } /* @@ -553,22 +489,20 @@ ipsec6_common_input(struct mbuf **mp, int *offp, int proto) * filtering and other sanity checks on the processed packet. */ int -ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int protoff, - struct m_tag *mt) +ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, + int protoff) { - int prot, af, sproto; + char buf[IPSEC_ADDRSTRLEN]; + struct ipsec_ctx_data ctx; + struct xform_history *xh; + struct secasindex *saidx; struct ip6_hdr *ip6; struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secasindex *saidx; - int nxt; - u_int8_t nxt8; + int prot, af, sproto; + int nxt, isr_prot; int error, nest; -#ifdef notyet - char ip6buf[INET6_ADDRSTRLEN]; -#endif + uint8_t nxt8; - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); saidx = &sav->sah->saidx; @@ -579,122 +513,67 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto sproto == IPPROTO_IPCOMP, ("unexpected security protocol %u", sproto)); - /* Sanity check */ - if (m == NULL) { - DPRINTF(("%s: null mbuf", __func__)); - IPSEC_ISTAT(sproto, badkcr); - error = EINVAL; - goto bad; - } - /* Fix IPv6 header */ if (m->m_len < sizeof(struct ip6_hdr) && (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { DPRINTF(("%s: processing failed for SA %s/%08lx\n", - __func__, ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + __func__, ipsec_address(&sav->sah->saidx.dst, buf, + sizeof(buf)), (u_long) ntohl(sav->spi))); IPSEC_ISTAT(sproto, hdrops); error = EACCES; goto bad; } + IPSEC_INIT_CTX(&ctx, &m, sav, af, IPSEC_ENC_BEFORE); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) + goto bad; + ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); /* Save protocol */ - m_copydata(m, protoff, 1, (unsigned char *) &prot); + m_copydata(m, protoff, 1, &nxt8); + prot = nxt8; -#ifdef notyet + /* IPv6-in-IP encapsulation */ + if (prot == IPPROTO_IPV6 && + saidx->mode != IPSEC_MODE_TRANSPORT) { + if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) { + IPSEC_ISTAT(sproto, hdrops); + error = EINVAL; + goto bad; + } + /* ip6n will now contain the inner IPv6 header. */ + m_striphdr(m, 0, skip); + skip = 0; + } #ifdef INET /* IP-in-IP encapsulation */ - if (prot == IPPROTO_IPIP) { - struct ip ipn; - + else if (prot == IPPROTO_IPIP && + saidx->mode != IPSEC_MODE_TRANSPORT) { if (m->m_pkthdr.len - skip < sizeof(struct ip)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; goto bad; } /* ipn will now contain the inner IPv4 header */ - m_copydata(m, skip, sizeof(struct ip), (caddr_t) &ipn); - - /* - * Check that the inner source address is the same as - * the proxy address, if available. - */ - if ((saidx->proxy.sa.sa_family == AF_INET && - saidx->proxy.sin.sin_addr.s_addr != INADDR_ANY && - ipn.ip_src.s_addr != saidx->proxy.sin.sin_addr.s_addr) || - (saidx->proxy.sa.sa_family != AF_INET && - saidx->proxy.sa.sa_family != 0)) { - - DPRINTF(("%s: inner source address %s doesn't " - "correspond to expected proxy source %s, " - "SA %s/%08lx\n", __func__, - inet_ntoa4(ipn.ip_src), - ipsec_address(&saidx->proxy), - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - - IPSEC_ISTAT(sproto, pdrops); - error = EACCES; - goto bad; - } + m_striphdr(m, 0, skip); + skip = 0; } #endif /* INET */ - - /* IPv6-in-IP encapsulation */ - if (prot == IPPROTO_IPV6) { - struct ip6_hdr ip6n; - - if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) { - IPSEC_ISTAT(sproto, hdrops); - error = EINVAL; - goto bad; - } - /* ip6n will now contain the inner IPv6 header. */ - m_copydata(m, skip, sizeof(struct ip6_hdr), - (caddr_t) &ip6n); - - /* - * Check that the inner source address is the same as - * the proxy address, if available. - */ - if ((saidx->proxy.sa.sa_family == AF_INET6 && - !IN6_IS_ADDR_UNSPECIFIED(&saidx->proxy.sin6.sin6_addr) && - !IN6_ARE_ADDR_EQUAL(&ip6n.ip6_src, - &saidx->proxy.sin6.sin6_addr)) || - (saidx->proxy.sa.sa_family != AF_INET6 && - saidx->proxy.sa.sa_family != 0)) { - - DPRINTF(("%s: inner source address %s doesn't " - "correspond to expected proxy source %s, " - "SA %s/%08lx\n", __func__, - ip6_sprintf(ip6buf, &ip6n.ip6_src), - ipsec_address(&saidx->proxy), - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - - IPSEC_ISTAT(sproto, pdrops); - error = EACCES; - goto bad; - } + else { + prot = IPPROTO_IPV6; /* for correct BPF processing */ } -#endif /*XXX*/ /* * Record what we've done to the packet (under what SA it was - * processed). If we've been passed an mtag, it means the packet - * was already processed by an ethernet/crypto combo card and - * thus has a tag attached with all the right information, but - * with a PACKET_TAG_IPSEC_IN_CRYPTO_DONE as opposed to - * PACKET_TAG_IPSEC_IN_DONE type; in that case, just change the type. + * processed). */ - if (mt == NULL && sproto != IPPROTO_IPCOMP) { + if (sproto != IPPROTO_IPCOMP) { mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE, - sizeof(struct tdb_ident), M_NOWAIT); + sizeof(struct xform_history), M_NOWAIT); if (mtag == NULL) { DPRINTF(("%s: failed to get tag\n", __func__)); IPSEC_ISTAT(sproto, hdrops); @@ -702,42 +581,61 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto goto bad; } - tdbi = (struct tdb_ident *)(mtag + 1); - bcopy(&saidx->dst, &tdbi->dst, sizeof(union sockaddr_union)); - tdbi->proto = sproto; - tdbi->spi = sav->spi; - /* Cache those two for enc(4) in xform_ipip. */ - tdbi->alg_auth = sav->alg_auth; - tdbi->alg_enc = sav->alg_enc; - + xh = (struct xform_history *)(mtag + 1); + bcopy(&saidx->dst, &xh->dst, saidx->dst.sa.sa_len); + xh->spi = sav->spi; + xh->proto = sproto; + xh->mode = saidx->mode; m_tag_prepend(m, mtag); - } else { - if (mt != NULL) - mt->m_tag_id = PACKET_TAG_IPSEC_IN_DONE; - /* XXX do we need to mark m_flags??? */ } key_sa_recordxfer(sav, m); -#ifdef DEV_ENC - encif->if_ipackets++; - encif->if_ibytes += m->m_pkthdr.len; - - /* - * Pass the mbuf to enc0 for bpf and pfil. We will filter the IPIP - * packet later after it has been decapsulated. - */ - ipsec_bpf(m, sav, AF_INET6, ENC_IN|ENC_BEFORE); - - /* XXX-BZ does not make sense. */ - if (prot != IPPROTO_IPIP) - if ((error = ipsec_filter(&m, PFIL_IN, ENC_IN|ENC_BEFORE)) != 0) - return (error); +#ifdef INET + if (prot == IPPROTO_IPIP) + af = AF_INET; + else #endif - - /* Retrieve new protocol */ - m_copydata(m, protoff, sizeof(u_int8_t), (caddr_t) &nxt8); - + af = AF_INET6; + IPSEC_INIT_CTX(&ctx, &m, sav, af, IPSEC_ENC_AFTER); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) + goto bad; + if (skip == 0) { + /* + * We stripped outer IPv6 header. + * Now we should requeue decrypted packet via netisr. + */ + switch (prot) { +#ifdef INET + case IPPROTO_IPIP: + isr_prot = NETISR_IP; + break; +#endif + case IPPROTO_IPV6: + isr_prot = NETISR_IPV6; + break; + default: + DPRINTF(("%s: cannot handle inner ip proto %d\n", + __func__, prot)); + IPSEC_ISTAT(sproto, nopf); + error = EPFNOSUPPORT; + goto bad; + } + /* Handle virtual tunneling interfaces */ + if (saidx->mode == IPSEC_MODE_TUNNEL) + error = ipsec_if_input(m, sav, af); + if (error == 0) { + error = netisr_queue_src(isr_prot, + (uintptr_t)sav->spi, m); + if (error) { + IPSEC_ISTAT(sproto, qfull); + DPRINTF(("%s: queue full; proto %u packet" + " dropped\n", __func__, sproto)); + } + } + key_freesav(&sav); + return (error); + } /* * See the end of ip6_input for this logic. * IPPROTO_IPV[46] case will be processed just like other ones @@ -773,99 +671,12 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto } nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &skip, nxt); } - return 0; + key_freesav(&sav); + return (0); bad: + key_freesav(&sav); if (m) m_freem(m); - return error; -} - -void -esp6_ctlinput(int cmd, struct sockaddr *sa, void *d) -{ - struct ip6ctlparam *ip6cp = NULL; - struct mbuf *m = NULL; - struct ip6_hdr *ip6; - int off; - - if (sa->sa_family != AF_INET6 || - sa->sa_len != sizeof(struct sockaddr_in6)) - return; - if ((unsigned)cmd >= PRC_NCMDS) - return; - - /* if the parameter is from icmp6, decode it. */ - if (d != NULL) { - ip6cp = (struct ip6ctlparam *)d; - m = ip6cp->ip6c_m; - ip6 = ip6cp->ip6c_ip6; - off = ip6cp->ip6c_off; - } else { - m = NULL; - ip6 = NULL; - off = 0; /* calm gcc */ - } - - if (ip6 != NULL) { - - struct ip6ctlparam ip6cp1; - - /* - * Notify the error to all possible sockets via pfctlinput2. - * Since the upper layer information (such as protocol type, - * source and destination ports) is embedded in the encrypted - * data and might have been cut, we can't directly call - * an upper layer ctlinput function. However, the pcbnotify - * function will consider source and destination addresses - * as well as the flow info value, and may be able to find - * some PCB that should be notified. - * Although pfctlinput2 will call esp6_ctlinput(), there is - * no possibility of an infinite loop of function calls, - * because we don't pass the inner IPv6 header. - */ - bzero(&ip6cp1, sizeof(ip6cp1)); - ip6cp1.ip6c_src = ip6cp->ip6c_src; - pfctlinput2(cmd, sa, (void *)&ip6cp1); - - /* - * Then go to special cases that need ESP header information. - * XXX: We assume that when ip6 is non NULL, - * M and OFF are valid. - */ - - if (cmd == PRC_MSGSIZE) { - struct secasvar *sav; - u_int32_t spi; - int valid; - - /* check header length before using m_copydata */ - if (m->m_pkthdr.len < off + sizeof (struct esp)) - return; - m_copydata(m, off + offsetof(struct esp, esp_spi), - sizeof(u_int32_t), (caddr_t) &spi); - /* - * Check to see if we have a valid SA corresponding to - * the address in the ICMP message payload. - */ - sav = KEY_ALLOCSA((union sockaddr_union *)sa, - IPPROTO_ESP, spi); - valid = (sav != NULL); - if (sav) - KEY_FREESAV(&sav); - - /* XXX Further validation? */ - - /* - * Depending on whether the SA is "valid" and - * routing table size (mtudisc_{hi,lo}wat), we will: - * - recalcurate the new MTU and create the - * corresponding routing entry, or - * - ignore the MTU change notification. - */ - icmp6_mtudisc_update(ip6cp, valid); - } - } else { - /* we normally notify any pcb here */ - } + return (error); } #endif /* INET6 */ diff --git a/freebsd/sys/netipsec/ipsec_mbuf.c b/freebsd/sys/netipsec/ipsec_mbuf.c index 2cafe058..d81c0deb 100644 --- a/freebsd/sys/netipsec/ipsec_mbuf.c +++ b/freebsd/sys/netipsec/ipsec_mbuf.c @@ -32,18 +32,14 @@ * IPsec-specific mbuf routines. */ -#include - #include #include +#include #include #include -#include #include - #include - #include /* @@ -76,7 +72,21 @@ m_makespace(struct mbuf *m0, int skip, int hlen, int *off) * the contents of m as needed. */ remain = m->m_len - skip; /* data to move */ - if (hlen > M_TRAILINGSPACE(m)) { + if (remain > skip && + hlen + max_linkhdr < M_LEADINGSPACE(m)) { + /* + * mbuf has enough free space at the beginning. + * XXX: which operation is the most heavy - copying of + * possible several hundred of bytes or allocation + * of new mbuf? We can remove max_linkhdr check + * here, but it is possible that this will lead + * to allocation of new mbuf in Layer 2 code. + */ + m->m_data -= hlen; + bcopy(mtodo(m, hlen), mtod(m, caddr_t), skip); + m->m_len += hlen; + *off = skip; + } else if (hlen > M_TRAILINGSPACE(m)) { struct mbuf *n0, *n, **np; int todo, len, done, alloc; @@ -87,11 +97,11 @@ m_makespace(struct mbuf *m0, int skip, int hlen, int *off) todo = remain; while (todo > 0) { if (todo > MHLEN) { - n = m_getcl(M_DONTWAIT, m->m_type, 0); + n = m_getcl(M_NOWAIT, m->m_type, 0); len = MCLBYTES; } else { - n = m_get(M_DONTWAIT, m->m_type); + n = m_get(M_NOWAIT, m->m_type); len = MHLEN; } if (n == NULL) { @@ -117,7 +127,7 @@ m_makespace(struct mbuf *m0, int skip, int hlen, int *off) } } else { - n = m_get(M_DONTWAIT, m->m_type); + n = m_get(M_NOWAIT, m->m_type); if (n == NULL) { m_freem(n0); return NULL; @@ -144,7 +154,7 @@ m_makespace(struct mbuf *m0, int skip, int hlen, int *off) * so there's space to write the new header. */ bcopy(mtod(m, caddr_t) + skip, - mtod(m, caddr_t) + skip + hlen, remain); + mtod(m, caddr_t) + skip + hlen, remain); m->m_len += hlen; *off = skip; } @@ -205,8 +215,8 @@ m_pad(struct mbuf *m, int n) if (pad > M_TRAILINGSPACE(m0)) { /* Add an mbuf to the chain. */ - MGET(m1, M_DONTWAIT, MT_DATA); - if (m1 == 0) { + MGET(m1, M_NOWAIT, MT_DATA); + if (m1 == NULL) { m_freem(m0); DPRINTF(("%s: unable to get extra mbuf\n", __func__)); return NULL; diff --git a/freebsd/sys/netipsec/ipsec_mod.c b/freebsd/sys/netipsec/ipsec_mod.c new file mode 100644 index 00000000..78b9f1da --- /dev/null +++ b/freebsd/sys/netipsec/ipsec_mod.c @@ -0,0 +1,150 @@ +#include + +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#ifdef INET +static const struct ipsec_methods ipv4_methods = { + .input = ipsec4_input, + .forward = ipsec4_forward, + .output = ipsec4_output, + .pcbctl = ipsec4_pcbctl, + .capability = ipsec4_capability, + .check_policy = ipsec4_in_reject, + .hdrsize = ipsec_hdrsiz_inpcb, + .udp_input = udp_ipsec_input, + .udp_pcbctl = udp_ipsec_pcbctl, +}; +#ifndef KLD_MODULE +static const struct ipsec_support ipv4_ipsec = { + .enabled = IPSEC_MODULE_ENABLED, + .methods = &ipv4_methods +}; +const struct ipsec_support * const ipv4_ipsec_support = &ipv4_ipsec; +#endif /* !KLD_MODULE */ +#endif /* INET */ + +#ifdef INET6 +static const struct ipsec_methods ipv6_methods = { + .input = ipsec6_input, + .forward = ipsec6_forward, + .output = ipsec6_output, + .pcbctl = ipsec6_pcbctl, + .capability = ipsec6_capability, + .check_policy = ipsec6_in_reject, + .hdrsize = ipsec_hdrsiz_inpcb, +}; +#ifndef KLD_MODULE +static const struct ipsec_support ipv6_ipsec = { + .enabled = IPSEC_MODULE_ENABLED, + .methods = &ipv6_methods +}; +const struct ipsec_support * const ipv6_ipsec_support = &ipv6_ipsec; +#endif /* !KLD_MODULE */ +#endif /* INET6 */ + +/* + * Always register ipsec module. + * Even when IPsec is build in the kernel, we need to have + * module registered. This will prevent to load ipsec.ko. + */ +static int +ipsec_modevent(module_t mod, int type, void *data) +{ + + switch (type) { + case MOD_LOAD: + /* All xforms are registered via SYSINIT */ + if (!ipsec_initialized()) + return (ENOMEM); +#ifdef KLD_MODULE +#ifdef INET + ipsec_support_enable(ipv4_ipsec_support, &ipv4_methods); +#endif +#ifdef INET6 + ipsec_support_enable(ipv6_ipsec_support, &ipv6_methods); +#endif +#endif /* KLD_MODULE */ + break; + case MOD_UNLOAD: + /* All xforms are unregistered via SYSUNINIT */ +#ifdef KLD_MODULE +#ifdef INET + ipsec_support_disable(ipv4_ipsec_support); +#endif +#ifdef INET6 + ipsec_support_disable(ipv6_ipsec_support); +#endif +#endif /* KLD_MODULE */ + break; + default: + return (EOPNOTSUPP); + } + return (0); +} + +static moduledata_t ipsec_mod = { + "ipsec", + ipsec_modevent, + 0 +}; + +DECLARE_MODULE(ipsec, ipsec_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); +MODULE_VERSION(ipsec, 1); +#ifdef KLD_MODULE +MODULE_DEPEND(ipsec, ipsec_support, 1, 1, 1); +#endif diff --git a/freebsd/sys/netipsec/ipsec_output.c b/freebsd/sys/netipsec/ipsec_output.c index a02b6ce2..3403b0bc 100644 --- a/freebsd/sys/netipsec/ipsec_output.c +++ b/freebsd/sys/netipsec/ipsec_output.c @@ -2,6 +2,7 @@ /*- * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting + * Copyright (c) 2016 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,7 +35,7 @@ #include #include #include -#include +#include #include #include @@ -43,11 +44,12 @@ #include #include #include +#include #include #include -#include -#include +#include +#include #include #include @@ -63,12 +65,19 @@ #include #ifdef INET6 #include +#include #endif #include #ifdef INET6 #include #endif +#ifdef SCTP +#include +#endif +#include +#include +#include #include #ifdef INET6 #include @@ -85,834 +94,880 @@ #include -#ifdef IPSEC_NAT_T -#include -#endif - -#ifdef DEV_ENC -#include -#endif +#define IPSEC_OSTAT_INC(proto, name) do { \ + if ((proto) == IPPROTO_ESP) \ + ESPSTAT_INC(esps_##name); \ + else if ((proto) == IPPROTO_AH)\ + AHSTAT_INC(ahs_##name); \ + else \ + IPCOMPSTAT_INC(ipcomps_##name); \ +} while (0) +static int ipsec_encap(struct mbuf **mp, struct secasindex *saidx); -int -ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr) +#ifdef INET +static struct secasvar * +ipsec4_allocsa(struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) { - struct tdb_ident *tdbi; - struct m_tag *mtag; + struct secasindex *saidx, tmpsaidx; + struct ipsecrequest *isr; + struct sockaddr_in *sin; struct secasvar *sav; - struct secasindex *saidx; - int error; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(isr != NULL, ("null ISR")); - sav = isr->sav; - IPSEC_ASSERT(sav != NULL, ("null SA")); - IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); + struct ip *ip; - saidx = &sav->sah->saidx; - switch (saidx->dst.sa.sa_family) { -#ifdef INET - case AF_INET: - /* Fix the header length, for AH processing. */ - mtod(m, struct ip *)->ip_len = htons(m->m_pkthdr.len); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - /* Fix the header length, for AH processing. */ - if (m->m_pkthdr.len < sizeof (struct ip6_hdr)) { - error = ENXIO; - goto bad; + /* + * Check system global policy controls. + */ +next: + isr = sp->req[*pidx]; + if ((isr->saidx.proto == IPPROTO_ESP && !V_esp_enable) || + (isr->saidx.proto == IPPROTO_AH && !V_ah_enable) || + (isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) { + DPRINTF(("%s: IPsec outbound packet dropped due" + " to policy (check your sysctls)\n", __func__)); + IPSEC_OSTAT_INC(isr->saidx.proto, pdrops); + *error = EHOSTUNREACH; + return (NULL); + } + /* + * Craft SA index to search for proper SA. Note that + * we only initialize unspecified SA peers for transport + * mode; for tunnel mode they must already be filled in. + */ + if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) { + saidx = &tmpsaidx; + *saidx = isr->saidx; + ip = mtod(m, struct ip *); + if (saidx->src.sa.sa_len == 0) { + sin = &saidx->src.sin; + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_port = IPSEC_PORT_ANY; + sin->sin_addr = ip->ip_src; } - if (m->m_pkthdr.len - sizeof (struct ip6_hdr) > IPV6_MAXPACKET) { - /* No jumbogram support. */ - error = ENXIO; /*?*/ - goto bad; + if (saidx->dst.sa.sa_len == 0) { + sin = &saidx->dst.sin; + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_port = IPSEC_PORT_ANY; + sin->sin_addr = ip->ip_dst; } - mtod(m, struct ip6_hdr *)->ip6_plen = - htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); - break; -#endif /* INET6 */ - default: - DPRINTF(("%s: unknown protocol family %u\n", __func__, - saidx->dst.sa.sa_family)); - error = ENXIO; - goto bad; + } else + saidx = &sp->req[*pidx]->saidx; + /* + * Lookup SA and validate it. + */ + sav = key_allocsa_policy(sp, saidx, error); + if (sav == NULL) { + IPSECSTAT_INC(ips_out_nosa); + if (*error != 0) + return (NULL); + if (ipsec_get_reqlevel(sp, *pidx) != IPSEC_LEVEL_REQUIRE) { + /* + * We have no SA and policy that doesn't require + * this IPsec transform, thus we can continue w/o + * IPsec processing, i.e. return EJUSTRETURN. + * But first check if there is some bundled transform. + */ + if (sp->tcount > ++(*pidx)) + goto next; + *error = EJUSTRETURN; + } + return (NULL); } + IPSEC_ASSERT(sav->tdb_xform != NULL, ("SA with NULL tdb_xform")); + return (sav); +} + +/* + * IPsec output logic for IPv4. + */ +static int +ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) +{ + char sbuf[IPSEC_ADDRSTRLEN], dbuf[IPSEC_ADDRSTRLEN]; + struct ipsec_ctx_data ctx; + union sockaddr_union *dst; + struct secasvar *sav; + struct ip *ip; + int error, i, off; + + IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); /* - * Add a record of what we've done or what needs to be done to the - * packet. + * We hold the reference to SP. Content of SP couldn't be changed. + * Craft secasindex and do lookup for suitable SA. + * Then do encapsulation if needed and call xform's output. + * We need to store SP in the xform callback parameters. + * In xform callback we will extract SP and it can be used to + * determine next transform. At the end of transform we can + * release reference to SP. */ - mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE, - sizeof(struct tdb_ident), M_NOWAIT); - if (mtag == NULL) { - DPRINTF(("%s: could not get packet tag\n", __func__)); - error = ENOMEM; + sav = ipsec4_allocsa(m, sp, &idx, &error); + if (sav == NULL) { + if (error == EJUSTRETURN) { /* No IPsec required */ + key_freesp(&sp); + return (error); + } goto bad; } - - tdbi = (struct tdb_ident *)(mtag + 1); - tdbi->dst = saidx->dst; - tdbi->proto = saidx->proto; - tdbi->spi = sav->spi; - m_tag_prepend(m, mtag); - /* - * If there's another (bundled) SA to apply, do so. - * Note that this puts a burden on the kernel stack size. - * If this is a problem we'll need to introduce a queue - * to set the packet on so we can unwind the stack before - * doing further processing. + * XXXAE: most likely ip_sum at this point is wrong. */ - if (isr->next) { - IPSECSTAT_INC(ips_out_bundlesa); - /* XXX-BZ currently only support same AF bundles. */ - switch (saidx->dst.sa.sa_family) { -#ifdef INET - case AF_INET: - return ipsec4_process_packet(m, isr->next, 0, 0); - /* NOTREACHED */ -#endif -#ifdef notyet -#ifdef INET6 - case AF_INET6: - /* XXX */ - ipsec6_output_trans() - ipsec6_output_tunnel() - /* NOTREACHED */ -#endif /* INET6 */ -#endif - default: - DPRINTF(("%s: unknown protocol family %u\n", __func__, - saidx->dst.sa.sa_family)); - error = ENXIO; + IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET, IPSEC_ENC_BEFORE); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) + goto bad; + + ip = mtod(m, struct ip *); + dst = &sav->sah->saidx.dst; + /* Do the appropriate encapsulation, if necessary */ + if (sp->req[idx]->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ + dst->sa.sa_family != AF_INET || /* PF mismatch */ + (dst->sa.sa_family == AF_INET && /* Proxy */ + dst->sin.sin_addr.s_addr != INADDR_ANY && + dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) { + /* Fix IPv4 header checksum and length */ + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, ip->ip_hl << 2); + error = ipsec_encap(&m, &sav->sah->saidx); + if (error != 0) { + DPRINTF(("%s: encapsulation for SA %s->%s " + "SPI 0x%08x failed with error %d\n", __func__, + ipsec_address(&sav->sah->saidx.src, sbuf, + sizeof(sbuf)), + ipsec_address(&sav->sah->saidx.dst, dbuf, + sizeof(dbuf)), ntohl(sav->spi), error)); + /* XXXAE: IPSEC_OSTAT_INC(tunnel); */ goto bad; } } - key_sa_recordxfer(sav, m); /* record data transfer */ - m_addr_changed(m); + IPSEC_INIT_CTX(&ctx, &m, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) + goto bad; /* - * We're done with IPsec processing, transmit the packet using the - * appropriate network protocol (IP or IPv6). SPD lookup will be - * performed again there. + * Dispatch to the appropriate IPsec transform logic. The + * packet will be returned for transmission after crypto + * processing, etc. are completed. + * + * NB: m & sav are ``passed to caller'' who's responsible for + * reclaiming their resources. */ - switch (saidx->dst.sa.sa_family) { -#ifdef INET - struct ip *ip; + switch(dst->sa.sa_family) { case AF_INET: ip = mtod(m, struct ip *); - ip->ip_len = ntohs(ip->ip_len); - ip->ip_off = ntohs(ip->ip_off); - -#ifdef IPSEC_NAT_T - /* - * If NAT-T is enabled, now that all IPsec processing is done - * insert UDP encapsulation header after IP header. - */ - if (sav->natt_type) { -#ifdef _IP_VHL - const int hlen = IP_VHL_HL(ip->ip_vhl); -#else - const int hlen = (ip->ip_hl << 2); -#endif - int size, off; - struct mbuf *mi; - struct udphdr *udp; - - size = sizeof(struct udphdr); - if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE) { - /* - * draft-ietf-ipsec-nat-t-ike-0[01].txt and - * draft-ietf-ipsec-udp-encaps-(00/)01.txt, - * ignoring possible AH mode - * non-IKE marker + non-ESP marker - * from draft-ietf-ipsec-udp-encaps-00.txt. - */ - size += sizeof(u_int64_t); - } - mi = m_makespace(m, hlen, size, &off); - if (mi == NULL) { - DPRINTF(("%s: m_makespace for udphdr failed\n", - __func__)); - error = ENOBUFS; - goto bad; - } - - udp = (struct udphdr *)(mtod(mi, caddr_t) + off); - if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE) - udp->uh_sport = htons(UDP_ENCAP_ESPINUDP_PORT); - else - udp->uh_sport = - KEY_PORTFROMSADDR(&sav->sah->saidx.src); - udp->uh_dport = KEY_PORTFROMSADDR(&sav->sah->saidx.dst); - udp->uh_sum = 0; - udp->uh_ulen = htons(m->m_pkthdr.len - hlen); - ip->ip_len = m->m_pkthdr.len; - ip->ip_p = IPPROTO_UDP; - - if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE) - *(u_int64_t *)(udp + 1) = 0; - } -#endif /* IPSEC_NAT_T */ - - return ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL); -#endif /* INET */ + i = ip->ip_hl << 2; + off = offsetof(struct ip, ip_p); + break; #ifdef INET6 case AF_INET6: - /* - * We don't need massage, IPv6 header fields are always in - * net endian. - */ - return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); + i = sizeof(struct ip6_hdr); + off = offsetof(struct ip6_hdr, ip6_nxt); + break; #endif /* INET6 */ + default: + DPRINTF(("%s: unsupported protocol family %u\n", + __func__, dst->sa.sa_family)); + error = EPFNOSUPPORT; + IPSEC_OSTAT_INC(sav->sah->saidx.proto, nopf); + goto bad; } - panic("ipsec_process_done"); + error = (*sav->tdb_xform->xf_output)(m, sp, sav, idx, i, off); + if (error != 0) { + key_freesav(&sav); + key_freesp(&sp); + } + return (error); bad: - m_freem(m); + IPSECSTAT_INC(ips_out_inval); + if (m != NULL) + m_freem(m); + if (sav != NULL) + key_freesav(&sav); + key_freesp(&sp); return (error); } -static struct ipsecrequest * -ipsec_nextisr( - struct mbuf *m, - struct ipsecrequest *isr, - int af, - struct secasindex *saidx, - int *error -) +int +ipsec4_process_packet(struct mbuf *m, struct secpolicy *sp, + struct inpcb *inp) { -#define IPSEC_OSTAT(name) do { \ - if (isr->saidx.proto == IPPROTO_ESP) \ - ESPSTAT_INC(esps_##name); \ - else if (isr->saidx.proto == IPPROTO_AH)\ - AHSTAT_INC(ahs_##name); \ - else \ - IPCOMPSTAT_INC(ipcomps_##name); \ -} while (0) - struct secasvar *sav; - IPSECREQUEST_LOCK_ASSERT(isr); + return (ipsec4_perform_request(m, sp, 0)); +} - IPSEC_ASSERT(af == AF_INET || af == AF_INET6, - ("invalid address family %u", af)); -again: - /* - * Craft SA index to search for proper SA. Note that - * we only fillin unspecified SA peers for transport - * mode; for tunnel mode they must already be filled in. - */ - *saidx = isr->saidx; - if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) { - /* Fillin unspecified SA peers only for transport mode */ - if (af == AF_INET) { - struct sockaddr_in *sin; - struct ip *ip = mtod(m, struct ip *); +static int +ipsec4_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) +{ + struct secpolicy *sp; + int error; - if (saidx->src.sa.sa_len == 0) { - sin = &saidx->src.sin; - sin->sin_len = sizeof(*sin); - sin->sin_family = AF_INET; - sin->sin_port = IPSEC_PORT_ANY; - sin->sin_addr = ip->ip_src; - } - if (saidx->dst.sa.sa_len == 0) { - sin = &saidx->dst.sin; - sin->sin_len = sizeof(*sin); - sin->sin_family = AF_INET; - sin->sin_port = IPSEC_PORT_ANY; - sin->sin_addr = ip->ip_dst; - } - } else { - struct sockaddr_in6 *sin6; - struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - - if (saidx->src.sin6.sin6_len == 0) { - sin6 = (struct sockaddr_in6 *)&saidx->src; - sin6->sin6_len = sizeof(*sin6); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = IPSEC_PORT_ANY; - sin6->sin6_addr = ip6->ip6_src; - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { - /* fix scope id for comparing SPD */ - sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = - ntohs(ip6->ip6_src.s6_addr16[1]); - } - } - if (saidx->dst.sin6.sin6_len == 0) { - sin6 = (struct sockaddr_in6 *)&saidx->dst; - sin6->sin6_len = sizeof(*sin6); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = IPSEC_PORT_ANY; - sin6->sin6_addr = ip6->ip6_dst; - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { - /* fix scope id for comparing SPD */ - sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = - ntohs(ip6->ip6_dst.s6_addr16[1]); - } - } + /* Lookup for the corresponding outbound security policy */ + sp = ipsec4_checkpolicy(m, inp, &error); + if (sp == NULL) { + if (error == -EINVAL) { + /* Discarded by policy. */ + m_freem(m); + return (EACCES); } + return (0); /* No IPsec required. */ } /* - * Lookup SA and validate it. + * Usually we have to have tunnel mode IPsec security policy + * when we are forwarding a packet. Otherwise we could not handle + * encrypted replies, because they are not destined for us. But + * some users are doing source address translation for forwarded + * packets, and thus, even if they are forwarded, the replies will + * return back to us. */ - *error = key_checkrequest(isr, saidx); - if (*error != 0) { + if (!forwarding) { /* - * IPsec processing is required, but no SA found. - * I assume that key_acquire() had been called - * to get/establish the SA. Here I discard - * this packet because it is responsibility for - * upper layer to retransmit the packet. + * Do delayed checksums now because we send before + * this is done in the normal processing path. */ - IPSECSTAT_INC(ips_out_nosa); - goto bad; + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +#ifdef SCTP + if (m->m_pkthdr.csum_flags & CSUM_SCTP) { + struct ip *ip = mtod(m, struct ip *); + + sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP; + } +#endif } - sav = isr->sav; - if (sav == NULL) { - IPSEC_ASSERT(ipsec_get_reqlevel(isr) == IPSEC_LEVEL_USE, - ("no SA found, but required; level %u", - ipsec_get_reqlevel(isr))); - IPSECREQUEST_UNLOCK(isr); - isr = isr->next; + /* NB: callee frees mbuf and releases reference to SP */ + error = ipsec4_process_packet(m, sp, inp); + if (error == EJUSTRETURN) { /* - * If isr is NULL, we found a 'use' policy w/o SA. - * Return w/o error and w/o isr so we can drop out - * and continue w/o IPsec processing. + * We had a SP with a level of 'use' and no SA. We + * will just continue to process the packet without + * IPsec processing and return without error. */ - if (isr == NULL) - return isr; - IPSECREQUEST_LOCK(isr); - goto again; + return (0); } + if (error == 0) + return (EINPROGRESS); /* consumed by IPsec */ + return (error); +} + +/* + * IPSEC_OUTPUT() method implementation for IPv4. + * 0 - no IPsec handling needed + * other values - mbuf consumed by IPsec. + */ +int +ipsec4_output(struct mbuf *m, struct inpcb *inp) +{ + + /* + * If the packet is resubmitted to ip_output (e.g. after + * AH, ESP, etc. processing), there will be a tag to bypass + * the lookup and related policy checking. + */ + if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) + return (0); + + return (ipsec4_common_output(m, inp, 0)); +} + +/* + * IPSEC_FORWARD() method implementation for IPv4. + * 0 - no IPsec handling needed + * other values - mbuf consumed by IPsec. + */ +int +ipsec4_forward(struct mbuf *m) +{ + + /* + * Check if this packet has an active inbound SP and needs to be + * dropped instead of forwarded. + */ + if (ipsec4_in_reject(m, NULL) != 0) { + m_freem(m); + return (EACCES); + } + return (ipsec4_common_output(m, NULL, 1)); +} +#endif + +#ifdef INET6 +static int +in6_sa_equal_addrwithscope(const struct sockaddr_in6 *sa, + const struct in6_addr *ia) +{ + struct in6_addr ia2; + + if (IN6_IS_SCOPE_LINKLOCAL(&sa->sin6_addr)) { + memcpy(&ia2, &sa->sin6_addr, sizeof(ia2)); + ia2.s6_addr16[1] = htons(sa->sin6_scope_id); + return (IN6_ARE_ADDR_EQUAL(ia, &ia2)); + } + return (IN6_ARE_ADDR_EQUAL(&sa->sin6_addr, ia)); +} + +static struct secasvar * +ipsec6_allocsa(struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) +{ + struct secasindex *saidx, tmpsaidx; + struct ipsecrequest *isr; + struct sockaddr_in6 *sin6; + struct secasvar *sav; + struct ip6_hdr *ip6; /* * Check system global policy controls. */ +next: + isr = sp->req[*pidx]; if ((isr->saidx.proto == IPPROTO_ESP && !V_esp_enable) || (isr->saidx.proto == IPPROTO_AH && !V_ah_enable) || (isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) { DPRINTF(("%s: IPsec outbound packet dropped due" " to policy (check your sysctls)\n", __func__)); - IPSEC_OSTAT(pdrops); + IPSEC_OSTAT_INC(isr->saidx.proto, pdrops); *error = EHOSTUNREACH; - goto bad; + return (NULL); } - /* - * Sanity check the SA contents for the caller - * before they invoke the xform output method. + * Craft SA index to search for proper SA. Note that + * we only fillin unspecified SA peers for transport + * mode; for tunnel mode they must already be filled in. */ - if (sav->tdb_xform == NULL) { - DPRINTF(("%s: no transform for SA\n", __func__)); - IPSEC_OSTAT(noxform); - *error = EHOSTUNREACH; - goto bad; + if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) { + saidx = &tmpsaidx; + *saidx = isr->saidx; + ip6 = mtod(m, struct ip6_hdr *); + if (saidx->src.sin6.sin6_len == 0) { + sin6 = (struct sockaddr_in6 *)&saidx->src; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = IPSEC_PORT_ANY; + sin6->sin6_addr = ip6->ip6_src; + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { + /* fix scope id for comparing SPD */ + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = + ntohs(ip6->ip6_src.s6_addr16[1]); + } + } + if (saidx->dst.sin6.sin6_len == 0) { + sin6 = (struct sockaddr_in6 *)&saidx->dst; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = IPSEC_PORT_ANY; + sin6->sin6_addr = ip6->ip6_dst; + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { + /* fix scope id for comparing SPD */ + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = + ntohs(ip6->ip6_dst.s6_addr16[1]); + } + } + } else + saidx = &sp->req[*pidx]->saidx; + /* + * Lookup SA and validate it. + */ + sav = key_allocsa_policy(sp, saidx, error); + if (sav == NULL) { + IPSEC6STAT_INC(ips_out_nosa); + if (*error != 0) + return (NULL); + if (ipsec_get_reqlevel(sp, *pidx) != IPSEC_LEVEL_REQUIRE) { + /* + * We have no SA and policy that doesn't require + * this IPsec transform, thus we can continue w/o + * IPsec processing, i.e. return EJUSTRETURN. + * But first check if there is some bundled transform. + */ + if (sp->tcount > ++(*pidx)) + goto next; + *error = EJUSTRETURN; + } + return (NULL); } - return isr; -bad: - IPSEC_ASSERT(*error != 0, ("error return w/ no error code")); - IPSECREQUEST_UNLOCK(isr); - return NULL; -#undef IPSEC_OSTAT + IPSEC_ASSERT(sav->tdb_xform != NULL, ("SA with NULL tdb_xform")); + return (sav); } -#ifdef INET /* - * IPsec output logic for IPv4. + * IPsec output logic for IPv6. */ -int -ipsec4_process_packet( - struct mbuf *m, - struct ipsecrequest *isr, - int flags, - int tunalready) +static int +ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) { - struct secasindex saidx; + char sbuf[IPSEC_ADDRSTRLEN], dbuf[IPSEC_ADDRSTRLEN]; + struct ipsec_ctx_data ctx; + union sockaddr_union *dst; struct secasvar *sav; - struct ip *ip; + struct ip6_hdr *ip6; int error, i, off; - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(isr != NULL, ("null isr")); - - IPSECREQUEST_LOCK(isr); /* insure SA contents don't change */ + IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); - isr = ipsec_nextisr(m, isr, AF_INET, &saidx, &error); - if (isr == NULL) { - if (error != 0) - goto bad; - return EJUSTRETURN; + sav = ipsec6_allocsa(m, sp, &idx, &error); + if (sav == NULL) { + if (error == EJUSTRETURN) { /* No IPsec required */ + key_freesp(&sp); + return (error); + } + goto bad; } - sav = isr->sav; - -#ifdef DEV_ENC - encif->if_opackets++; - encif->if_obytes += m->m_pkthdr.len; + /* Fix IP length in case if it is not set yet. */ + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); - /* pass the mbuf to enc0 for bpf processing */ - ipsec_bpf(m, sav, AF_INET, ENC_OUT|ENC_BEFORE); - /* pass the mbuf to enc0 for packet filtering */ - if ((error = ipsec_filter(&m, PFIL_OUT, ENC_OUT|ENC_BEFORE)) != 0) + IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET6, IPSEC_ENC_BEFORE); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; -#endif - if (!tunalready) { - union sockaddr_union *dst = &sav->sah->saidx.dst; - int setdf; + ip6 = mtod(m, struct ip6_hdr *); /* pfil can change mbuf */ + dst = &sav->sah->saidx.dst; - /* - * Collect IP_DF state from the outer header. - */ - if (dst->sa.sa_family == AF_INET) { - if (m->m_len < sizeof (struct ip) && - (m = m_pullup(m, sizeof (struct ip))) == NULL) { - error = ENOBUFS; - goto bad; - } - ip = mtod(m, struct ip *); - /* Honor system-wide control of how to handle IP_DF */ - switch (V_ip4_ipsec_dfbit) { - case 0: /* clear in outer header */ - case 1: /* set in outer header */ - setdf = V_ip4_ipsec_dfbit; - break; - default: /* propagate to outer header */ - setdf = ntohs(ip->ip_off & IP_DF); - break; - } - } else { - ip = NULL; /* keep compiler happy */ - setdf = 0; + /* Do the appropriate encapsulation, if necessary */ + if (sp->req[idx]->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ + dst->sa.sa_family != AF_INET6 || /* PF mismatch */ + ((dst->sa.sa_family == AF_INET6) && + (!IN6_IS_ADDR_UNSPECIFIED(&dst->sin6.sin6_addr)) && + (!in6_sa_equal_addrwithscope(&dst->sin6, &ip6->ip6_dst)))) { + if (m->m_pkthdr.len - sizeof(*ip6) > IPV6_MAXPACKET) { + /* No jumbogram support. */ + error = ENXIO; /*XXX*/ + goto bad; } - /* Do the appropriate encapsulation, if necessary */ - if (isr->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ - dst->sa.sa_family != AF_INET || /* PF mismatch */ -#if 0 - (sav->flags & SADB_X_SAFLAGS_TUNNEL) || /* Tunnel requ'd */ - sav->tdb_xform->xf_type == XF_IP4 || /* ditto */ -#endif - (dst->sa.sa_family == AF_INET && /* Proxy */ - dst->sin.sin_addr.s_addr != INADDR_ANY && - dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) { - struct mbuf *mp; - - /* Fix IPv4 header checksum and length */ - if (m->m_len < sizeof (struct ip) && - (m = m_pullup(m, sizeof (struct ip))) == NULL) { - error = ENOBUFS; - goto bad; - } - ip = mtod(m, struct ip *); - ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_sum = 0; -#ifdef _IP_VHL - if (ip->ip_vhl == IP_VHL_BORING) - ip->ip_sum = in_cksum_hdr(ip); - else - ip->ip_sum = in_cksum(m, - _IP_VHL_HL(ip->ip_vhl) << 2); -#else - ip->ip_sum = in_cksum(m, ip->ip_hl << 2); -#endif - - /* Encapsulate the packet */ - error = ipip_output(m, isr, &mp, 0, 0); - if (mp == NULL && !error) { - /* Should never happen. */ - DPRINTF(("%s: ipip_output returns no mbuf and " - "no error!", __func__)); - error = EFAULT; - } - if (error) { - if (mp) { - /* XXX: Should never happen! */ - m_freem(mp); - } - m = NULL; /* ipip_output() already freed it */ - goto bad; - } - m = mp, mp = NULL; - /* - * ipip_output clears IP_DF in the new header. If - * we need to propagate IP_DF from the outer header, - * then we have to do it here. - * - * XXX shouldn't assume what ipip_output does. - */ - if (dst->sa.sa_family == AF_INET && setdf) { - if (m->m_len < sizeof (struct ip) && - (m = m_pullup(m, sizeof (struct ip))) == NULL) { - error = ENOBUFS; - goto bad; - } - ip = mtod(m, struct ip *); - ip->ip_off = ntohs(ip->ip_off); - ip->ip_off |= IP_DF; - ip->ip_off = htons(ip->ip_off); - } + error = ipsec_encap(&m, &sav->sah->saidx); + if (error != 0) { + DPRINTF(("%s: encapsulation for SA %s->%s " + "SPI 0x%08x failed with error %d\n", __func__, + ipsec_address(&sav->sah->saidx.src, sbuf, + sizeof(sbuf)), + ipsec_address(&sav->sah->saidx.dst, dbuf, + sizeof(dbuf)), ntohl(sav->spi), error)); + /* XXXAE: IPSEC_OSTAT_INC(tunnel); */ + goto bad; } } -#ifdef DEV_ENC - /* pass the mbuf to enc0 for bpf processing */ - ipsec_bpf(m, sav, AF_INET, ENC_OUT|ENC_AFTER); - /* pass the mbuf to enc0 for packet filtering */ - if ((error = ipsec_filter(&m, PFIL_OUT, ENC_OUT|ENC_AFTER)) != 0) + IPSEC_INIT_CTX(&ctx, &m, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; -#endif - /* - * Dispatch to the appropriate IPsec transform logic. The - * packet will be returned for transmission after crypto - * processing, etc. are completed. For encapsulation we - * bypass this call because of the explicit call done above - * (necessary to deal with IP_DF handling for IPv4). - * - * NB: m & sav are ``passed to caller'' who's reponsible for - * for reclaiming their resources. - */ - if (sav->tdb_xform->xf_type != XF_IP4) { + switch(dst->sa.sa_family) { +#ifdef INET + case AF_INET: + { + struct ip *ip; ip = mtod(m, struct ip *); i = ip->ip_hl << 2; off = offsetof(struct ip, ip_p); - error = (*sav->tdb_xform->xf_output)(m, isr, NULL, i, off); - } else { - error = ipsec_process_done(m, isr); + } + break; +#endif /* AF_INET */ + case AF_INET6: + i = sizeof(struct ip6_hdr); + off = offsetof(struct ip6_hdr, ip6_nxt); + break; + default: + DPRINTF(("%s: unsupported protocol family %u\n", + __func__, dst->sa.sa_family)); + error = EPFNOSUPPORT; + IPSEC_OSTAT_INC(sav->sah->saidx.proto, nopf); + goto bad; } - IPSECREQUEST_UNLOCK(isr); - return error; + error = (*sav->tdb_xform->xf_output)(m, sp, sav, idx, i, off); + if (error != 0) { + key_freesav(&sav); + key_freesp(&sp); + } + return (error); bad: - if (isr) - IPSECREQUEST_UNLOCK(isr); - if (m) + IPSEC6STAT_INC(ips_out_inval); + if (m != NULL) m_freem(m); - return error; + if (sav != NULL) + key_freesav(&sav); + key_freesp(&sp); + return (error); } -#endif -#ifdef INET6 -/* - * Chop IP6 header from the payload. - */ -static struct mbuf * -ipsec6_splithdr(struct mbuf *m) +int +ipsec6_process_packet(struct mbuf *m, struct secpolicy *sp, + struct inpcb *inp) { - struct mbuf *mh; - struct ip6_hdr *ip6; - int hlen; - IPSEC_ASSERT(m->m_len >= sizeof (struct ip6_hdr), - ("first mbuf too short, len %u", m->m_len)); - ip6 = mtod(m, struct ip6_hdr *); - hlen = sizeof(struct ip6_hdr); - if (m->m_len > hlen) { - MGETHDR(mh, M_DONTWAIT, MT_DATA); - if (!mh) { + return (ipsec6_perform_request(m, sp, 0)); +} + +static int +ipsec6_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) +{ + struct secpolicy *sp; + int error; + + /* Lookup for the corresponding outbound security policy */ + sp = ipsec6_checkpolicy(m, inp, &error); + if (sp == NULL) { + if (error == -EINVAL) { + /* Discarded by policy. */ m_freem(m); - return NULL; + return (EACCES); + } + return (0); /* No IPsec required. */ + } + + if (!forwarding) { + /* + * Do delayed checksums now because we send before + * this is done in the normal processing path. + */ + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { + in6_delayed_cksum(m, m->m_pkthdr.len - + sizeof(struct ip6_hdr), sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; } - M_MOVE_PKTHDR(mh, m); - MH_ALIGN(mh, hlen); - m->m_len -= hlen; - m->m_data += hlen; - mh->m_next = m; - m = mh; - m->m_len = hlen; - bcopy((caddr_t)ip6, mtod(m, caddr_t), hlen); - } else if (m->m_len < hlen) { - m = m_pullup(m, hlen); - if (!m) - return NULL; +#ifdef SCTP + if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { + sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; + } +#endif + } + /* NB: callee frees mbuf and releases reference to SP */ + error = ipsec6_process_packet(m, sp, inp); + if (error == EJUSTRETURN) { + /* + * We had a SP with a level of 'use' and no SA. We + * will just continue to process the packet without + * IPsec processing and return without error. + */ + return (0); } - return m; + if (error == 0) + return (EINPROGRESS); /* consumed by IPsec */ + return (error); } /* - * IPsec output logic for IPv6, transport mode. + * IPSEC_OUTPUT() method implementation for IPv6. + * 0 - no IPsec handling needed + * other values - mbuf consumed by IPsec. */ int -ipsec6_output_trans( - struct ipsec_output_state *state, - u_char *nexthdrp, - struct mbuf *mprev, - struct secpolicy *sp, - int flags, - int *tun) +ipsec6_output(struct mbuf *m, struct inpcb *inp) { - struct ipsecrequest *isr; - struct secasindex saidx; - int error = 0; - struct mbuf *m; - - IPSEC_ASSERT(state != NULL, ("null state")); - IPSEC_ASSERT(state->m != NULL, ("null m")); - IPSEC_ASSERT(nexthdrp != NULL, ("null nexthdrp")); - IPSEC_ASSERT(mprev != NULL, ("null mprev")); - IPSEC_ASSERT(sp != NULL, ("null sp")); - IPSEC_ASSERT(tun != NULL, ("null tun")); - - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("%s: applied SP\n", __func__); - kdebug_secpolicy(sp)); - - isr = sp->req; - if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { - /* the rest will be handled by ipsec6_output_tunnel() */ - *tun = 1; /* need tunnel-mode processing */ - return 0; - } - *tun = 0; - m = state->m; - - IPSECREQUEST_LOCK(isr); /* insure SA contents don't change */ - isr = ipsec_nextisr(m, isr, AF_INET6, &saidx, &error); - if (isr == NULL) { - if (error != 0) { -#ifdef notdef - /* XXX should notification be done for all errors ? */ - /* - * Notify the fact that the packet is discarded - * to ourselves. I believe this is better than - * just silently discarding. (jinmei@kame.net) - * XXX: should we restrict the error to TCP packets? - * XXX: should we directly notify sockets via - * pfctlinputs? - */ - icmp6_error(m, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_ADMIN, 0); - m = NULL; /* NB: icmp6_error frees mbuf */ -#endif - goto bad; - } - return EJUSTRETURN; - } + /* + * If the packet is resubmitted to ip_output (e.g. after + * AH, ESP, etc. processing), there will be a tag to bypass + * the lookup and related policy checking. + */ + if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) + return (0); - error = (*isr->sav->tdb_xform->xf_output)(m, isr, NULL, - sizeof (struct ip6_hdr), - offsetof(struct ip6_hdr, - ip6_nxt)); - IPSECREQUEST_UNLOCK(isr); - return error; -bad: - if (isr) - IPSECREQUEST_UNLOCK(isr); - if (m) - m_freem(m); - state->m = NULL; - return error; + return (ipsec6_common_output(m, inp, 0)); } -static int -ipsec6_encapsulate(struct mbuf *m, struct secasvar *sav) +/* + * IPSEC_FORWARD() method implementation for IPv6. + * 0 - no IPsec handling needed + * other values - mbuf consumed by IPsec. + */ +int +ipsec6_forward(struct mbuf *m) { - struct ip6_hdr *oip6; - struct ip6_hdr *ip6; - size_t plen; - - /* can't tunnel between different AFs */ - if (sav->sah->saidx.src.sa.sa_family != AF_INET6 || - sav->sah->saidx.dst.sa.sa_family != AF_INET6) { - m_freem(m); - return EINVAL; - } - IPSEC_ASSERT(m->m_len == sizeof (struct ip6_hdr), - ("mbuf wrong size; len %u", m->m_len)); - /* - * grow the mbuf to accomodate the new IPv6 header. + * Check if this packet has an active inbound SP and needs to be + * dropped instead of forwarded. */ - plen = m->m_pkthdr.len; - if (M_LEADINGSPACE(m->m_next) < sizeof(struct ip6_hdr)) { - struct mbuf *n; - MGET(n, M_DONTWAIT, MT_DATA); - if (!n) { - m_freem(m); - return ENOBUFS; - } - n->m_len = sizeof(struct ip6_hdr); - n->m_next = m->m_next; - m->m_next = n; - m->m_pkthdr.len += sizeof(struct ip6_hdr); - oip6 = mtod(n, struct ip6_hdr *); - } else { - m->m_next->m_len += sizeof(struct ip6_hdr); - m->m_next->m_data -= sizeof(struct ip6_hdr); - m->m_pkthdr.len += sizeof(struct ip6_hdr); - oip6 = mtod(m->m_next, struct ip6_hdr *); - } - ip6 = mtod(m, struct ip6_hdr *); - bcopy((caddr_t)ip6, (caddr_t)oip6, sizeof(struct ip6_hdr)); - - /* Fake link-local scope-class addresses */ - if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src)) - oip6->ip6_src.s6_addr16[1] = 0; - if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst)) - oip6->ip6_dst.s6_addr16[1] = 0; - - /* construct new IPv6 header. see RFC 2401 5.1.2.2 */ - /* ECN consideration. */ - ip6_ecn_ingress(V_ip6_ipsec_ecn, &ip6->ip6_flow, &oip6->ip6_flow); - if (plen < IPV6_MAXPACKET - sizeof(struct ip6_hdr)) - ip6->ip6_plen = htons(plen); - else { - /* ip6->ip6_plen will be updated in ip6_output() */ + if (ipsec6_in_reject(m, NULL) != 0) { + m_freem(m); + return (EACCES); } - ip6->ip6_nxt = IPPROTO_IPV6; - ip6->ip6_src = sav->sah->saidx.src.sin6.sin6_addr; - ip6->ip6_dst = sav->sah->saidx.dst.sin6.sin6_addr; - ip6->ip6_hlim = IPV6_DEFHLIM; - - /* XXX Should ip6_src be updated later ? */ - - return 0; + return (ipsec6_common_output(m, NULL, 1)); } +#endif /* INET6 */ -/* - * IPsec output logic for IPv6, tunnel mode. - */ int -ipsec6_output_tunnel(struct ipsec_output_state *state, struct secpolicy *sp, int flags) +ipsec_process_done(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, + u_int idx) { - struct ip6_hdr *ip6; - struct ipsecrequest *isr; - struct secasindex saidx; + struct xform_history *xh; + struct secasindex *saidx; + struct m_tag *mtag; int error; - struct sockaddr_in6 *dst6; - struct mbuf *m; - IPSEC_ASSERT(state != NULL, ("null state")); - IPSEC_ASSERT(state->m != NULL, ("null m")); - IPSEC_ASSERT(sp != NULL, ("null sp")); - - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("%s: applied SP\n", __func__); - kdebug_secpolicy(sp)); + saidx = &sav->sah->saidx; + switch (saidx->dst.sa.sa_family) { +#ifdef INET + case AF_INET: + /* Fix the header length, for AH processing. */ + mtod(m, struct ip *)->ip_len = htons(m->m_pkthdr.len); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + /* Fix the header length, for AH processing. */ + if (m->m_pkthdr.len < sizeof (struct ip6_hdr)) { + error = ENXIO; + goto bad; + } + if (m->m_pkthdr.len - sizeof (struct ip6_hdr) > IPV6_MAXPACKET) { + /* No jumbogram support. */ + error = ENXIO; /*?*/ + goto bad; + } + mtod(m, struct ip6_hdr *)->ip6_plen = + htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); + break; +#endif /* INET6 */ + default: + DPRINTF(("%s: unknown protocol family %u\n", __func__, + saidx->dst.sa.sa_family)); + error = ENXIO; + goto bad; + } - m = state->m; /* - * transport mode ipsec (before the 1st tunnel mode) is already - * processed by ipsec6_output_trans(). + * Add a record of what we've done to the packet. */ - for (isr = sp->req; isr; isr = isr->next) { - if (isr->saidx.mode == IPSEC_MODE_TUNNEL) - break; - } - - IPSECREQUEST_LOCK(isr); /* insure SA contents don't change */ - isr = ipsec_nextisr(m, isr, AF_INET6, &saidx, &error); - if (isr == NULL) { - if (error != 0) - goto bad; - return EJUSTRETURN; + mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE, sizeof(*xh), M_NOWAIT); + if (mtag == NULL) { + DPRINTF(("%s: could not get packet tag\n", __func__)); + error = ENOMEM; + goto bad; } -#ifdef DEV_ENC - encif->if_opackets++; - encif->if_obytes += m->m_pkthdr.len; + xh = (struct xform_history *)(mtag + 1); + xh->dst = saidx->dst; + xh->proto = saidx->proto; + xh->mode = saidx->mode; + xh->spi = sav->spi; + m_tag_prepend(m, mtag); - /* pass the mbuf to enc0 for bpf processing */ - ipsec_bpf(m, isr->sav, AF_INET6, ENC_OUT|ENC_BEFORE); - /* pass the mbuf to enc0 for packet filtering */ - if ((error = ipsec_filter(&m, PFIL_OUT, ENC_OUT|ENC_BEFORE)) != 0) - goto bad; -#endif + key_sa_recordxfer(sav, m); /* record data transfer */ /* - * There may be the case that SA status will be changed when - * we are refering to one. So calling splsoftnet(). + * If there's another (bundled) SA to apply, do so. + * Note that this puts a burden on the kernel stack size. + * If this is a problem we'll need to introduce a queue + * to set the packet on so we can unwind the stack before + * doing further processing. */ - if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { - /* - * build IPsec tunnel. - */ - /* XXX should be processed with other familiy */ - if (isr->sav->sah->saidx.src.sa.sa_family != AF_INET6) { - ipseclog((LOG_ERR, "%s: family mismatched between " - "inner and outer, spi=%u\n", __func__, - ntohl(isr->sav->spi))); - IPSEC6STAT_INC(ips_out_inval); - error = EAFNOSUPPORT; - goto bad; - } - - m = ipsec6_splithdr(m); - if (!m) { - IPSEC6STAT_INC(ips_out_nomem); - error = ENOMEM; - goto bad; - } - error = ipsec6_encapsulate(m, isr->sav); - if (error) { - m = NULL; + if (++idx < sp->tcount) { + switch (saidx->dst.sa.sa_family) { +#ifdef INET + case AF_INET: + key_freesav(&sav); + IPSECSTAT_INC(ips_out_bundlesa); + return (ipsec4_perform_request(m, sp, idx)); + /* NOTREACHED */ +#endif +#ifdef INET6 + case AF_INET6: + key_freesav(&sav); + IPSEC6STAT_INC(ips_out_bundlesa); + return (ipsec6_perform_request(m, sp, idx)); + /* NOTREACHED */ +#endif /* INET6 */ + default: + DPRINTF(("%s: unknown protocol family %u\n", __func__, + saidx->dst.sa.sa_family)); + error = EPFNOSUPPORT; goto bad; } - ip6 = mtod(m, struct ip6_hdr *); + } - state->ro = - (struct route *)&isr->sav->sah->route_cache.sin6_route; - state->dst = (struct sockaddr *)&state->ro->ro_dst; - dst6 = (struct sockaddr_in6 *)state->dst; - if (state->ro->ro_rt - && ((state->ro->ro_rt->rt_flags & RTF_UP) == 0 - || !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst))) { - RTFREE(state->ro->ro_rt); - state->ro->ro_rt = NULL; - } - if (state->ro->ro_rt == NULL) { - bzero(dst6, sizeof(*dst6)); - dst6->sin6_family = AF_INET6; - dst6->sin6_len = sizeof(*dst6); - dst6->sin6_addr = ip6->ip6_dst; - rtalloc_ign_fib(state->ro, 0UL, M_GETFIB(m)); - } - if (state->ro->ro_rt == NULL) { - IP6STAT_INC(ip6s_noroute); - IPSEC6STAT_INC(ips_out_noroute); - error = EHOSTUNREACH; + key_freesp(&sp), sp = NULL; /* Release reference to SP */ +#ifdef INET + /* + * Do UDP encapsulation if SA requires it. + */ + if (sav->natt != NULL) { + error = udp_ipsec_output(m, sav); + if (error != 0) goto bad; - } - - /* adjust state->dst if tunnel endpoint is offlink */ - if (state->ro->ro_rt->rt_flags & RTF_GATEWAY) - state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway; } +#endif /* INET */ + /* + * We're done with IPsec processing, transmit the packet using the + * appropriate network protocol (IP or IPv6). + */ + switch (saidx->dst.sa.sa_family) { +#ifdef INET + case AF_INET: + key_freesav(&sav); + return ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL); +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + key_freesav(&sav); + return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); +#endif /* INET6 */ + } + panic("ipsec_process_done"); +bad: + m_freem(m); + key_freesav(&sav); + if (sp != NULL) + key_freesp(&sp); + return (error); +} - m = ipsec6_splithdr(m); - if (!m) { - IPSEC6STAT_INC(ips_out_nomem); - error = ENOMEM; - goto bad; +/* + * ipsec_prepend() is optimized version of M_PREPEND(). + * ipsec_encap() is called by IPsec output routine for tunnel mode SA. + * It is expected that after IP encapsulation some IPsec transform will + * be performed. Each IPsec transform inserts its variable length header + * just after outer IP header using m_makespace(). If given mbuf has not + * enough free space at the beginning, we allocate new mbuf and reserve + * some space at the beginning and at the end. + * This helps avoid allocating of new mbuf and data copying in m_makespace(), + * we place outer header in the middle of mbuf's data with reserved leading + * and trailing space: + * [ LEADINGSPACE ][ Outer IP header ][ TRAILINGSPACE ] + * LEADINGSPACE will be used to add ethernet header, TRAILINGSPACE will + * be used to inject AH/ESP/IPCOMP header. + */ +#define IPSEC_TRAILINGSPACE (sizeof(struct udphdr) +/* NAT-T */ \ + max(sizeof(struct newesp) + EALG_MAX_BLOCK_LEN, /* ESP + IV */ \ + sizeof(struct newah) + HASH_MAX_LEN /* AH + ICV */)) +static struct mbuf * +ipsec_prepend(struct mbuf *m, int len, int how) +{ + struct mbuf *n; + + M_ASSERTPKTHDR(m); + IPSEC_ASSERT(len < MHLEN, ("wrong length")); + if (M_LEADINGSPACE(m) >= len) { + /* No need to allocate new mbuf. */ + m->m_data -= len; + m->m_len += len; + m->m_pkthdr.len += len; + return (m); } - ip6 = mtod(m, struct ip6_hdr *); + n = m_gethdr(how, m->m_type); + if (n == NULL) { + m_freem(m); + return (NULL); + } + m_move_pkthdr(n, m); + n->m_next = m; + if (len + IPSEC_TRAILINGSPACE < M_SIZE(n)) + m_align(n, len + IPSEC_TRAILINGSPACE); + n->m_len = len; + n->m_pkthdr.len += len; + return (n); +} -#ifdef DEV_ENC - /* pass the mbuf to enc0 for bpf processing */ - ipsec_bpf(m, isr->sav, AF_INET6, ENC_OUT|ENC_AFTER); - /* pass the mbuf to enc0 for packet filtering */ - if ((error = ipsec_filter(&m, PFIL_OUT, ENC_OUT|ENC_AFTER)) != 0) - goto bad; +static int +ipsec_encap(struct mbuf **mp, struct secasindex *saidx) +{ +#ifdef INET6 + struct ip6_hdr *ip6; #endif + struct ip *ip; + int setdf; + uint8_t itos, proto; - error = (*isr->sav->tdb_xform->xf_output)(m, isr, NULL, - sizeof (struct ip6_hdr), - offsetof(struct ip6_hdr, ip6_nxt)); - IPSECREQUEST_UNLOCK(isr); - return error; -bad: - if (isr) - IPSECREQUEST_UNLOCK(isr); - if (m) - m_freem(m); - state->m = NULL; - return error; + ip = mtod(*mp, struct ip *); + switch (ip->ip_v) { +#ifdef INET + case IPVERSION: + proto = IPPROTO_IPIP; + /* + * Collect IP_DF state from the inner header + * and honor system-wide control of how to handle it. + */ + switch (V_ip4_ipsec_dfbit) { + case 0: /* clear in outer header */ + case 1: /* set in outer header */ + setdf = V_ip4_ipsec_dfbit; + break; + default:/* propagate to outer header */ + setdf = (ip->ip_off & htons(IP_DF)) != 0; + } + itos = ip->ip_tos; + break; +#endif +#ifdef INET6 + case (IPV6_VERSION >> 4): + proto = IPPROTO_IPV6; + ip6 = mtod(*mp, struct ip6_hdr *); + itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; + setdf = V_ip4_ipsec_dfbit ? 1: 0; + /* scoped address handling */ + in6_clearscope(&ip6->ip6_src); + in6_clearscope(&ip6->ip6_dst); + break; +#endif + default: + return (EAFNOSUPPORT); + } + switch (saidx->dst.sa.sa_family) { +#ifdef INET + case AF_INET: + if (saidx->src.sa.sa_family != AF_INET || + saidx->src.sin.sin_addr.s_addr == INADDR_ANY || + saidx->dst.sin.sin_addr.s_addr == INADDR_ANY) + return (EINVAL); + *mp = ipsec_prepend(*mp, sizeof(struct ip), M_NOWAIT); + if (*mp == NULL) + return (ENOBUFS); + ip = mtod(*mp, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(struct ip) >> 2; + ip->ip_p = proto; + ip->ip_len = htons((*mp)->m_pkthdr.len); + ip->ip_ttl = V_ip_defttl; + ip->ip_sum = 0; + ip->ip_off = setdf ? htons(IP_DF): 0; + ip->ip_src = saidx->src.sin.sin_addr; + ip->ip_dst = saidx->dst.sin.sin_addr; + ip_ecn_ingress(V_ip4_ipsec_ecn, &ip->ip_tos, &itos); + ip_fillid(ip); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (saidx->src.sa.sa_family != AF_INET6 || + IN6_IS_ADDR_UNSPECIFIED(&saidx->src.sin6.sin6_addr) || + IN6_IS_ADDR_UNSPECIFIED(&saidx->dst.sin6.sin6_addr)) + return (EINVAL); + *mp = ipsec_prepend(*mp, sizeof(struct ip6_hdr), M_NOWAIT); + if (*mp == NULL) + return (ENOBUFS); + ip6 = mtod(*mp, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc = IPV6_VERSION; + ip6->ip6_hlim = V_ip6_defhlim; + ip6->ip6_nxt = proto; + ip6->ip6_dst = saidx->dst.sin6.sin6_addr; + /* For link-local address embed scope zone id */ + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) + ip6->ip6_dst.s6_addr16[1] = + htons(saidx->dst.sin6.sin6_scope_id & 0xffff); + ip6->ip6_src = saidx->src.sin6.sin6_addr; + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) + ip6->ip6_src.s6_addr16[1] = + htons(saidx->src.sin6.sin6_scope_id & 0xffff); + ip6->ip6_plen = htons((*mp)->m_pkthdr.len - sizeof(*ip6)); + ip_ecn_ingress(V_ip6_ipsec_ecn, &proto, &itos); + ip6->ip6_flow |= htonl((uint32_t)proto << 20); + break; +#endif /* INET6 */ + default: + return (EAFNOSUPPORT); + } + (*mp)->m_flags &= ~(M_BCAST | M_MCAST); + return (0); } -#endif /*INET6*/ + diff --git a/freebsd/sys/netipsec/ipsec_pcb.c b/freebsd/sys/netipsec/ipsec_pcb.c new file mode 100644 index 00000000..21a8182d --- /dev/null +++ b/freebsd/sys/netipsec/ipsec_pcb.c @@ -0,0 +1,481 @@ +#include + +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy"); + +static void +ipsec_setsockaddrs_inpcb(struct inpcb *inp, union sockaddr_union *src, + union sockaddr_union *dst, u_int dir) +{ + +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) { + struct sockaddr_in6 *sin6; + + bzero(&src->sin6, sizeof(src->sin6)); + bzero(&dst->sin6, sizeof(dst->sin6)); + src->sin6.sin6_family = AF_INET6; + src->sin6.sin6_len = sizeof(struct sockaddr_in6); + dst->sin6.sin6_family = AF_INET6; + dst->sin6.sin6_len = sizeof(struct sockaddr_in6); + + if (dir == IPSEC_DIR_OUTBOUND) + sin6 = &src->sin6; + else + sin6 = &dst->sin6; + sin6->sin6_addr = inp->in6p_laddr; + sin6->sin6_port = inp->inp_lport; + if (IN6_IS_SCOPE_LINKLOCAL(&inp->in6p_laddr)) { + /* XXXAE: use in6p_zoneid */ + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = ntohs( + inp->in6p_laddr.s6_addr16[1]); + } + + if (dir == IPSEC_DIR_OUTBOUND) + sin6 = &dst->sin6; + else + sin6 = &src->sin6; + sin6->sin6_addr = inp->in6p_faddr; + sin6->sin6_port = inp->inp_fport; + if (IN6_IS_SCOPE_LINKLOCAL(&inp->in6p_faddr)) { + /* XXXAE: use in6p_zoneid */ + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = ntohs( + inp->in6p_faddr.s6_addr16[1]); + } + } +#endif +#ifdef INET + if (inp->inp_vflag & INP_IPV4) { + struct sockaddr_in *sin; + + bzero(&src->sin, sizeof(src->sin)); + bzero(&dst->sin, sizeof(dst->sin)); + src->sin.sin_family = AF_INET; + src->sin.sin_len = sizeof(struct sockaddr_in); + dst->sin.sin_family = AF_INET; + dst->sin.sin_len = sizeof(struct sockaddr_in); + + if (dir == IPSEC_DIR_OUTBOUND) + sin = &src->sin; + else + sin = &dst->sin; + sin->sin_addr = inp->inp_laddr; + sin->sin_port = inp->inp_lport; + + if (dir == IPSEC_DIR_OUTBOUND) + sin = &dst->sin; + else + sin = &src->sin; + sin->sin_addr = inp->inp_faddr; + sin->sin_port = inp->inp_fport; + } +#endif +} + +void +ipsec_setspidx_inpcb(struct inpcb *inp, struct secpolicyindex *spidx, + u_int dir) +{ + + ipsec_setsockaddrs_inpcb(inp, &spidx->src, &spidx->dst, dir); +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) { + spidx->prefs = sizeof(struct in6_addr) << 3; + spidx->prefd = sizeof(struct in6_addr) << 3; + } +#endif +#ifdef INET + if (inp->inp_vflag & INP_IPV4) { + spidx->prefs = sizeof(struct in_addr) << 3; + spidx->prefd = sizeof(struct in_addr) << 3; + } +#endif + spidx->ul_proto = IPPROTO_TCP; /* XXX: currently only TCP uses this */ + spidx->dir = dir; + KEYDBG(IPSEC_DUMP, + printf("%s: ", __func__); kdebug_secpolicyindex(spidx, NULL)); +} + +/* Initialize PCB policy. */ +int +ipsec_init_pcbpolicy(struct inpcb *inp) +{ + + IPSEC_ASSERT(inp != NULL, ("null inp")); + IPSEC_ASSERT(inp->inp_sp == NULL, ("inp_sp already initialized")); + + inp->inp_sp = malloc(sizeof(struct inpcbpolicy), M_IPSEC_INPCB, + M_NOWAIT | M_ZERO); + if (inp->inp_sp == NULL) + return (ENOBUFS); + return (0); +} + +/* Delete PCB policy. */ +int +ipsec_delete_pcbpolicy(struct inpcb *inp) +{ + + if (inp->inp_sp == NULL) + return (0); + + if (inp->inp_sp->flags & INP_INBOUND_POLICY) + key_freesp(&inp->inp_sp->sp_in); + + if (inp->inp_sp->flags & INP_OUTBOUND_POLICY) + key_freesp(&inp->inp_sp->sp_out); + + free(inp->inp_sp, M_IPSEC_INPCB); + inp->inp_sp = NULL; + return (0); +} + +/* Deep-copy a policy in PCB. */ +static struct secpolicy * +ipsec_deepcopy_pcbpolicy(struct secpolicy *src) +{ + struct secpolicy *dst; + int i; + + if (src == NULL) + return (NULL); + + IPSEC_ASSERT(src->state == IPSEC_SPSTATE_PCB, ("SP isn't PCB")); + + dst = key_newsp(); + if (dst == NULL) + return (NULL); + + /* spidx is not copied here */ + dst->policy = src->policy; + dst->state = src->state; + dst->priority = src->priority; + /* Do not touch the refcnt field. */ + + /* Copy IPsec request chain. */ + for (i = 0; i < src->tcount; i++) { + dst->req[i] = ipsec_newisr(); + if (dst->req[i] == NULL) { + key_freesp(&dst); + return (NULL); + } + bcopy(src->req[i], dst->req[i], sizeof(struct ipsecrequest)); + dst->tcount++; + } + KEYDBG(IPSEC_DUMP, + printf("%s: copied SP(%p) -> SP(%p)\n", __func__, src, dst); + kdebug_secpolicy(dst)); + return (dst); +} + +/* + * Copy IPsec policy from old INPCB into new. + * It is expected that new INPCB has not configured policies. + */ +int +ipsec_copy_pcbpolicy(struct inpcb *old, struct inpcb *new) +{ + struct secpolicy *sp; + + /* + * old->inp_sp can be NULL if PCB was created when an IPsec + * support was unavailable. This is not an error, we don't have + * policies in this PCB, so nothing to copy. + */ + if (old->inp_sp == NULL) + return (0); + + IPSEC_ASSERT(new->inp_sp != NULL, ("new inp_sp is NULL")); + IPSEC_ASSERT((new->inp_sp->flags & ( + INP_INBOUND_POLICY | INP_OUTBOUND_POLICY)) == 0, + ("new PCB already has configured policies")); + INP_WLOCK_ASSERT(new); + INP_LOCK_ASSERT(old); + + if (old->inp_sp->flags & INP_INBOUND_POLICY) { + sp = ipsec_deepcopy_pcbpolicy(old->inp_sp->sp_in); + if (sp == NULL) + return (ENOBUFS); + ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_INBOUND); + new->inp_sp->sp_in = sp; + new->inp_sp->flags |= INP_INBOUND_POLICY; + } + if (old->inp_sp->flags & INP_OUTBOUND_POLICY) { + sp = ipsec_deepcopy_pcbpolicy(old->inp_sp->sp_out); + if (sp == NULL) + return (ENOBUFS); + ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_OUTBOUND); + new->inp_sp->sp_out = sp; + new->inp_sp->flags |= INP_OUTBOUND_POLICY; + } + return (0); +} + +static int +ipsec_set_pcbpolicy(struct inpcb *inp, struct ucred *cred, + void *request, size_t len) +{ + struct sadb_x_policy *xpl; + struct secpolicy **spp, *newsp; + int error, flags; + + xpl = (struct sadb_x_policy *)request; + /* Select direction. */ + switch (xpl->sadb_x_policy_dir) { + case IPSEC_DIR_INBOUND: + case IPSEC_DIR_OUTBOUND: + break; + default: + ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, + xpl->sadb_x_policy_dir)); + return (EINVAL); + } + /* + * Privileged sockets are allowed to set own security policy + * and configure IPsec bypass. Unprivileged sockets only can + * have ENTRUST policy. + */ + switch (xpl->sadb_x_policy_type) { + case IPSEC_POLICY_IPSEC: + case IPSEC_POLICY_BYPASS: + if (cred != NULL && + priv_check_cred(cred, PRIV_NETINET_IPSEC, 0) != 0) + return (EACCES); + /* Allocate new SP entry. */ + newsp = key_msg2sp(xpl, len, &error); + if (newsp == NULL) + return (error); + newsp->state = IPSEC_SPSTATE_PCB; + newsp->spidx.ul_proto = IPSEC_ULPROTO_ANY; +#ifdef INET + if (inp->inp_vflag & INP_IPV4) { + newsp->spidx.src.sin.sin_family = + newsp->spidx.dst.sin.sin_family = AF_INET; + newsp->spidx.src.sin.sin_len = + newsp->spidx.dst.sin.sin_len = + sizeof(struct sockaddr_in); + } +#endif +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) { + newsp->spidx.src.sin6.sin6_family = + newsp->spidx.dst.sin6.sin6_family = AF_INET6; + newsp->spidx.src.sin6.sin6_len = + newsp->spidx.dst.sin6.sin6_len = + sizeof(struct sockaddr_in6); + } +#endif + break; + case IPSEC_POLICY_ENTRUST: + /* We just use NULL pointer for ENTRUST policy */ + newsp = NULL; + break; + default: + /* Other security policy types aren't allowed for PCB */ + return (EINVAL); + } + + INP_WLOCK(inp); + if (xpl->sadb_x_policy_dir == IPSEC_DIR_INBOUND) { + spp = &inp->inp_sp->sp_in; + flags = INP_INBOUND_POLICY; + } else { + spp = &inp->inp_sp->sp_out; + flags = INP_OUTBOUND_POLICY; + } + /* Clear old SP and set new SP. */ + if (*spp != NULL) + key_freesp(spp); + *spp = newsp; + KEYDBG(IPSEC_DUMP, + printf("%s: new SP(%p)\n", __func__, newsp)); + if (newsp == NULL) + inp->inp_sp->flags &= ~flags; + else { + inp->inp_sp->flags |= flags; + KEYDBG(IPSEC_DUMP, kdebug_secpolicy(newsp)); + } + INP_WUNLOCK(inp); + return (0); +} + +static int +ipsec_get_pcbpolicy(struct inpcb *inp, void *request, size_t *len) +{ + struct sadb_x_policy *xpl; + struct secpolicy *sp; + int error, flags; + + xpl = (struct sadb_x_policy *)request; + + INP_RLOCK(inp); + flags = inp->inp_sp->flags; + /* Select direction. */ + switch (xpl->sadb_x_policy_dir) { + case IPSEC_DIR_INBOUND: + sp = inp->inp_sp->sp_in; + flags &= INP_INBOUND_POLICY; + break; + case IPSEC_DIR_OUTBOUND: + sp = inp->inp_sp->sp_out; + flags &= INP_OUTBOUND_POLICY; + break; + default: + INP_RUNLOCK(inp); + ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, + xpl->sadb_x_policy_dir)); + return (EINVAL); + } + + if (flags == 0) { + /* Return ENTRUST policy */ + INP_RUNLOCK(inp); + xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY; + xpl->sadb_x_policy_type = IPSEC_POLICY_ENTRUST; + xpl->sadb_x_policy_id = 0; + xpl->sadb_x_policy_priority = 0; + xpl->sadb_x_policy_len = PFKEY_UNIT64(sizeof(*xpl)); + *len = sizeof(*xpl); + return (0); + } + + IPSEC_ASSERT(sp != NULL, + ("sp is NULL, but flags is 0x%04x", inp->inp_sp->flags)); + + key_addref(sp); + INP_RUNLOCK(inp); + error = key_sp2msg(sp, request, len); + key_freesp(&sp); + if (error == EINVAL) + return (error); + /* + * We return "success", but user should check *len. + * *len will be set to size of valid data and + * sadb_x_policy_len will contain needed size. + */ + return (0); +} + +/* Handle socket option control request for PCB */ +static int +ipsec_control_pcbpolicy(struct inpcb *inp, struct sockopt *sopt) +{ + void *optdata; + size_t optlen; + int error; + + if (inp->inp_sp == NULL) + return (ENOPROTOOPT); + + /* Limit maximum request size to PAGE_SIZE */ + optlen = sopt->sopt_valsize; + if (optlen < sizeof(struct sadb_x_policy) || optlen > PAGE_SIZE) + return (EINVAL); + + optdata = malloc(optlen, M_TEMP, sopt->sopt_td ? M_WAITOK: M_NOWAIT); + if (optdata == NULL) + return (ENOBUFS); + /* + * We need a hint from the user, what policy is requested - input + * or output? User should specify it in the buffer, even for + * setsockopt(). + */ + error = sooptcopyin(sopt, optdata, optlen, optlen); + if (error == 0) { + if (sopt->sopt_dir == SOPT_SET) + error = ipsec_set_pcbpolicy(inp, + sopt->sopt_td ? sopt->sopt_td->td_ucred: NULL, + optdata, optlen); + else { + error = ipsec_get_pcbpolicy(inp, optdata, &optlen); + if (error == 0) + error = sooptcopyout(sopt, optdata, optlen); + } + } + free(optdata, M_TEMP); + return (error); +} + +#ifdef INET +/* + * IPSEC_PCBCTL() method implementation for IPv4. + */ +int +ipsec4_pcbctl(struct inpcb *inp, struct sockopt *sopt) +{ + + if (sopt->sopt_name != IP_IPSEC_POLICY) + return (ENOPROTOOPT); + return (ipsec_control_pcbpolicy(inp, sopt)); +} +#endif + +#ifdef INET6 +/* + * IPSEC_PCBCTL() method implementation for IPv6. + */ +int +ipsec6_pcbctl(struct inpcb *inp, struct sockopt *sopt) +{ + + if (sopt->sopt_name != IPV6_IPSEC_POLICY) + return (ENOPROTOOPT); + return (ipsec_control_pcbpolicy(inp, sopt)); +} +#endif + diff --git a/freebsd/sys/netipsec/ipsec_support.h b/freebsd/sys/netipsec/ipsec_support.h new file mode 100644 index 00000000..b72aee20 --- /dev/null +++ b/freebsd/sys/netipsec/ipsec_support.h @@ -0,0 +1,190 @@ +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETIPSEC_IPSEC_SUPPORT_H_ +#define _NETIPSEC_IPSEC_SUPPORT_H_ + +#ifdef _KERNEL +#if defined(IPSEC) || defined(IPSEC_SUPPORT) +struct mbuf; +struct inpcb; +struct tcphdr; +struct sockopt; +struct sockaddr; +struct ipsec_support; +struct tcpmd5_support; + +size_t ipsec_hdrsiz_inpcb(struct inpcb *); +int ipsec_init_pcbpolicy(struct inpcb *); +int ipsec_delete_pcbpolicy(struct inpcb *); +int ipsec_copy_pcbpolicy(struct inpcb *, struct inpcb *); + +struct ipsec_methods { + int (*input)(struct mbuf *, int, int); + int (*check_policy)(const struct mbuf *, struct inpcb *); + int (*forward)(struct mbuf *); + int (*output)(struct mbuf *, struct inpcb *); + int (*pcbctl)(struct inpcb *, struct sockopt *); + size_t (*hdrsize)(struct inpcb *); + int (*capability)(struct mbuf *, u_int); + int (*ctlinput)(int, struct sockaddr *, void *); + + int (*udp_input)(struct mbuf *, int, int); + int (*udp_pcbctl)(struct inpcb *, struct sockopt *); +}; +#define IPSEC_CAP_OPERABLE 1 +#define IPSEC_CAP_BYPASS_FILTER 2 + +struct tcpmd5_methods { + int (*input)(struct mbuf *, struct tcphdr *, u_char *); + int (*output)(struct mbuf *, struct tcphdr *, u_char *); + int (*pcbctl)(struct inpcb *, struct sockopt *); +}; + +#define IPSEC_MODULE_ENABLED 0x0001 +#define IPSEC_ENABLED(proto) \ + ((proto ## _ipsec_support)->enabled & IPSEC_MODULE_ENABLED) +#define TCPMD5_ENABLED() IPSEC_ENABLED(tcp) + +#ifdef TCP_SIGNATURE +/* TCP-MD5 build in the kernel */ +struct tcpmd5_support { + const u_int enabled; + const struct tcpmd5_methods * const methods; +}; +extern const struct tcpmd5_support * const tcp_ipsec_support; + +#define TCPMD5_INPUT(m, ...) \ + (*tcp_ipsec_support->methods->input)(m, __VA_ARGS__) +#define TCPMD5_OUTPUT(m, ...) \ + (*tcp_ipsec_support->methods->output)(m, __VA_ARGS__) +#define TCPMD5_PCBCTL(inp, sopt) \ + (*tcp_ipsec_support->methods->pcbctl)(inp, sopt) +#elif defined(IPSEC_SUPPORT) +/* TCP-MD5 build as module */ +struct tcpmd5_support { + volatile u_int enabled; + const struct tcpmd5_methods * volatile methods; +}; +extern struct tcpmd5_support * const tcp_ipsec_support; + +void tcpmd5_support_enable(const struct tcpmd5_methods * const); +void tcpmd5_support_disable(void); + +int tcpmd5_kmod_pcbctl(struct tcpmd5_support * const, struct inpcb *, + struct sockopt *); +int tcpmd5_kmod_input(struct tcpmd5_support * const, struct mbuf *, + struct tcphdr *, u_char *); +int tcpmd5_kmod_output(struct tcpmd5_support * const, struct mbuf *, + struct tcphdr *, u_char *); +#define TCPMD5_INPUT(m, ...) \ + tcpmd5_kmod_input(tcp_ipsec_support, m, __VA_ARGS__) +#define TCPMD5_OUTPUT(m, ...) \ + tcpmd5_kmod_output(tcp_ipsec_support, m, __VA_ARGS__) +#define TCPMD5_PCBCTL(inp, sopt) \ + tcpmd5_kmod_pcbctl(tcp_ipsec_support, inp, sopt) +#endif + +#endif /* IPSEC || IPSEC_SUPPORT */ + +#if defined(IPSEC) +struct ipsec_support { + const u_int enabled; + const struct ipsec_methods * const methods; +}; +extern const struct ipsec_support * const ipv4_ipsec_support; +extern const struct ipsec_support * const ipv6_ipsec_support; + +#define IPSEC_INPUT(proto, m, ...) \ + (*(proto ## _ipsec_support)->methods->input)(m, __VA_ARGS__) +#define IPSEC_CHECK_POLICY(proto, m, ...) \ + (*(proto ## _ipsec_support)->methods->check_policy)(m, __VA_ARGS__) +#define IPSEC_FORWARD(proto, m) \ + (*(proto ## _ipsec_support)->methods->forward)(m) +#define IPSEC_OUTPUT(proto, m, ...) \ + (*(proto ## _ipsec_support)->methods->output)(m, __VA_ARGS__) +#define IPSEC_PCBCTL(proto, inp, sopt) \ + (*(proto ## _ipsec_support)->methods->pcbctl)(inp, sopt) +#define IPSEC_CAPS(proto, m, ...) \ + (*(proto ## _ipsec_support)->methods->capability)(m, __VA_ARGS__) +#define IPSEC_HDRSIZE(proto, inp) \ + (*(proto ## _ipsec_support)->methods->hdrsize)(inp) + +#define UDPENCAP_INPUT(m, ...) \ + (*ipv4_ipsec_support->methods->udp_input)(m, __VA_ARGS__) +#define UDPENCAP_PCBCTL(inp, sopt) \ + (*ipv4_ipsec_support->methods->udp_pcbctl)(inp, sopt) + +#elif defined(IPSEC_SUPPORT) +struct ipsec_support { + volatile u_int enabled; + const struct ipsec_methods * volatile methods; +}; +extern struct ipsec_support * const ipv4_ipsec_support; +extern struct ipsec_support * const ipv6_ipsec_support; + +void ipsec_support_enable(struct ipsec_support * const, + const struct ipsec_methods * const); +void ipsec_support_disable(struct ipsec_support * const); + +int ipsec_kmod_input(struct ipsec_support * const, struct mbuf *, int, int); +int ipsec_kmod_check_policy(struct ipsec_support * const, struct mbuf *, + struct inpcb *); +int ipsec_kmod_forward(struct ipsec_support * const, struct mbuf *); +int ipsec_kmod_output(struct ipsec_support * const, struct mbuf *, + struct inpcb *); +int ipsec_kmod_pcbctl(struct ipsec_support * const, struct inpcb *, + struct sockopt *); +int ipsec_kmod_capability(struct ipsec_support * const, struct mbuf *, u_int); +size_t ipsec_kmod_hdrsize(struct ipsec_support * const, struct inpcb *); +int ipsec_kmod_udp_input(struct ipsec_support * const, struct mbuf *, int, int); +int ipsec_kmod_udp_pcbctl(struct ipsec_support * const, struct inpcb *, + struct sockopt *); + +#define UDPENCAP_INPUT(m, ...) \ + ipsec_kmod_udp_input(ipv4_ipsec_support, m, __VA_ARGS__) +#define UDPENCAP_PCBCTL(inp, sopt) \ + ipsec_kmod_udp_pcbctl(ipv4_ipsec_support, inp, sopt) + +#define IPSEC_INPUT(proto, ...) \ + ipsec_kmod_input(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_CHECK_POLICY(proto, ...) \ + ipsec_kmod_check_policy(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_FORWARD(proto, ...) \ + ipsec_kmod_forward(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_OUTPUT(proto, ...) \ + ipsec_kmod_output(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_PCBCTL(proto, ...) \ + ipsec_kmod_pcbctl(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_CAPS(proto, ...) \ + ipsec_kmod_capability(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_HDRSIZE(proto, ...) \ + ipsec_kmod_hdrsize(proto ## _ipsec_support, __VA_ARGS__) +#endif /* IPSEC_SUPPORT */ +#endif /* _KERNEL */ +#endif /* _NETIPSEC_IPSEC_SUPPORT_H_ */ diff --git a/freebsd/sys/netipsec/key.c b/freebsd/sys/netipsec/key.c index 6619c6db..5d3a612d 100644 --- a/freebsd/sys/netipsec/key.c +++ b/freebsd/sys/netipsec/key.c @@ -44,12 +44,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -59,15 +61,18 @@ #include #include +#include + #include -#include -#include +#include #include +#include #include #include #include #include +#include #ifdef INET6 #include @@ -75,13 +80,6 @@ #include #endif /* INET6 */ -#if defined(INET) || defined(INET6) -#include -#endif -#ifdef INET6 -#include -#endif /* INET6 */ - #include #include #include @@ -94,7 +92,7 @@ #endif #include - +#include #include /* randomness */ @@ -142,28 +140,118 @@ static VNET_DEFINE(int, key_preferred_oldsa) = 1; static VNET_DEFINE(u_int32_t, acq_seq) = 0; #define V_acq_seq VNET(acq_seq) - /* SPD */ -static VNET_DEFINE(LIST_HEAD(_sptree, secpolicy), sptree[IPSEC_DIR_MAX]); +static VNET_DEFINE(uint32_t, sp_genid) = 0; +#define V_sp_genid VNET(sp_genid) + +/* SPD */ +TAILQ_HEAD(secpolicy_queue, secpolicy); +LIST_HEAD(secpolicy_list, secpolicy); +static VNET_DEFINE(struct secpolicy_queue, sptree[IPSEC_DIR_MAX]); +static VNET_DEFINE(struct secpolicy_queue, sptree_ifnet[IPSEC_DIR_MAX]); +static struct rmlock sptree_lock; #define V_sptree VNET(sptree) -static struct mtx sptree_lock; -#define SPTREE_LOCK_INIT() \ - mtx_init(&sptree_lock, "sptree", \ - "fast ipsec security policy database", MTX_DEF) -#define SPTREE_LOCK_DESTROY() mtx_destroy(&sptree_lock) -#define SPTREE_LOCK() mtx_lock(&sptree_lock) -#define SPTREE_UNLOCK() mtx_unlock(&sptree_lock) -#define SPTREE_LOCK_ASSERT() mtx_assert(&sptree_lock, MA_OWNED) - -static VNET_DEFINE(LIST_HEAD(_sahtree, secashead), sahtree); /* SAD */ +#define V_sptree_ifnet VNET(sptree_ifnet) +#define SPTREE_LOCK_INIT() rm_init(&sptree_lock, "sptree") +#define SPTREE_LOCK_DESTROY() rm_destroy(&sptree_lock) +#define SPTREE_RLOCK_TRACKER struct rm_priotracker sptree_tracker +#define SPTREE_RLOCK() rm_rlock(&sptree_lock, &sptree_tracker) +#define SPTREE_RUNLOCK() rm_runlock(&sptree_lock, &sptree_tracker) +#define SPTREE_RLOCK_ASSERT() rm_assert(&sptree_lock, RA_RLOCKED) +#define SPTREE_WLOCK() rm_wlock(&sptree_lock) +#define SPTREE_WUNLOCK() rm_wunlock(&sptree_lock) +#define SPTREE_WLOCK_ASSERT() rm_assert(&sptree_lock, RA_WLOCKED) +#define SPTREE_UNLOCK_ASSERT() rm_assert(&sptree_lock, RA_UNLOCKED) + +/* Hash table for lookup SP using unique id */ +static VNET_DEFINE(struct secpolicy_list *, sphashtbl); +static VNET_DEFINE(u_long, sphash_mask); +#define V_sphashtbl VNET(sphashtbl) +#define V_sphash_mask VNET(sphash_mask) + +#define SPHASH_NHASH_LOG2 7 +#define SPHASH_NHASH (1 << SPHASH_NHASH_LOG2) +#define SPHASH_HASHVAL(id) (key_u32hash(id) & V_sphash_mask) +#define SPHASH_HASH(id) &V_sphashtbl[SPHASH_HASHVAL(id)] + +/* SAD */ +TAILQ_HEAD(secashead_queue, secashead); +LIST_HEAD(secashead_list, secashead); +static VNET_DEFINE(struct secashead_queue, sahtree); +static struct rmlock sahtree_lock; #define V_sahtree VNET(sahtree) -static struct mtx sahtree_lock; -#define SAHTREE_LOCK_INIT() \ - mtx_init(&sahtree_lock, "sahtree", \ - "fast ipsec security association database", MTX_DEF) -#define SAHTREE_LOCK_DESTROY() mtx_destroy(&sahtree_lock) -#define SAHTREE_LOCK() mtx_lock(&sahtree_lock) -#define SAHTREE_UNLOCK() mtx_unlock(&sahtree_lock) -#define SAHTREE_LOCK_ASSERT() mtx_assert(&sahtree_lock, MA_OWNED) +#define SAHTREE_LOCK_INIT() rm_init(&sahtree_lock, "sahtree") +#define SAHTREE_LOCK_DESTROY() rm_destroy(&sahtree_lock) +#define SAHTREE_RLOCK_TRACKER struct rm_priotracker sahtree_tracker +#define SAHTREE_RLOCK() rm_rlock(&sahtree_lock, &sahtree_tracker) +#define SAHTREE_RUNLOCK() rm_runlock(&sahtree_lock, &sahtree_tracker) +#define SAHTREE_RLOCK_ASSERT() rm_assert(&sahtree_lock, RA_RLOCKED) +#define SAHTREE_WLOCK() rm_wlock(&sahtree_lock) +#define SAHTREE_WUNLOCK() rm_wunlock(&sahtree_lock) +#define SAHTREE_WLOCK_ASSERT() rm_assert(&sahtree_lock, RA_WLOCKED) +#define SAHTREE_UNLOCK_ASSERT() rm_assert(&sahtree_lock, RA_UNLOCKED) + +/* Hash table for lookup in SAD using SA addresses */ +static VNET_DEFINE(struct secashead_list *, sahaddrhashtbl); +static VNET_DEFINE(u_long, sahaddrhash_mask); +#define V_sahaddrhashtbl VNET(sahaddrhashtbl) +#define V_sahaddrhash_mask VNET(sahaddrhash_mask) + +#define SAHHASH_NHASH_LOG2 7 +#define SAHHASH_NHASH (1 << SAHHASH_NHASH_LOG2) +#define SAHADDRHASH_HASHVAL(saidx) \ + (key_saidxhash(saidx) & V_sahaddrhash_mask) +#define SAHADDRHASH_HASH(saidx) \ + &V_sahaddrhashtbl[SAHADDRHASH_HASHVAL(saidx)] + +/* Hash table for lookup in SAD using SPI */ +LIST_HEAD(secasvar_list, secasvar); +static VNET_DEFINE(struct secasvar_list *, savhashtbl); +static VNET_DEFINE(u_long, savhash_mask); +#define V_savhashtbl VNET(savhashtbl) +#define V_savhash_mask VNET(savhash_mask) +#define SAVHASH_NHASH_LOG2 7 +#define SAVHASH_NHASH (1 << SAVHASH_NHASH_LOG2) +#define SAVHASH_HASHVAL(spi) (key_u32hash(spi) & V_savhash_mask) +#define SAVHASH_HASH(spi) &V_savhashtbl[SAVHASH_HASHVAL(spi)] + +static uint32_t +key_saidxhash(const struct secasindex *saidx) +{ + uint32_t hval; + + hval = fnv_32_buf(&saidx->proto, sizeof(saidx->proto), + FNV1_32_INIT); + switch (saidx->dst.sa.sa_family) { +#ifdef INET + case AF_INET: + hval = fnv_32_buf(&saidx->src.sin.sin_addr, + sizeof(in_addr_t), hval); + hval = fnv_32_buf(&saidx->dst.sin.sin_addr, + sizeof(in_addr_t), hval); + break; +#endif +#ifdef INET6 + case AF_INET6: + hval = fnv_32_buf(&saidx->src.sin6.sin6_addr, + sizeof(struct in6_addr), hval); + hval = fnv_32_buf(&saidx->dst.sin6.sin6_addr, + sizeof(struct in6_addr), hval); + break; +#endif + default: + hval = 0; + ipseclog((LOG_DEBUG, "%s: unknown address family %d", + __func__, saidx->dst.sa.sa_family)); + } + return (hval); +} + +static uint32_t +key_u32hash(uint32_t val) +{ + + return (fnv_32_buf(&val, sizeof(val), FNV1_32_INIT)); +} /* registed list */ static VNET_DEFINE(LIST_HEAD(_regtree, secreg), regtree[SADB_SATYPE_MAX + 1]); @@ -176,16 +264,40 @@ static struct mtx regtree_lock; #define REGTREE_UNLOCK() mtx_unlock(®tree_lock) #define REGTREE_LOCK_ASSERT() mtx_assert(®tree_lock, MA_OWNED) -static VNET_DEFINE(LIST_HEAD(_acqtree, secacq), acqtree); /* acquiring list */ +/* Acquiring list */ +LIST_HEAD(secacq_list, secacq); +static VNET_DEFINE(struct secacq_list, acqtree); #define V_acqtree VNET(acqtree) static struct mtx acq_lock; #define ACQ_LOCK_INIT() \ - mtx_init(&acq_lock, "acqtree", "fast ipsec acquire list", MTX_DEF) + mtx_init(&acq_lock, "acqtree", "ipsec SA acquiring list", MTX_DEF) #define ACQ_LOCK_DESTROY() mtx_destroy(&acq_lock) #define ACQ_LOCK() mtx_lock(&acq_lock) #define ACQ_UNLOCK() mtx_unlock(&acq_lock) #define ACQ_LOCK_ASSERT() mtx_assert(&acq_lock, MA_OWNED) +/* Hash table for lookup in ACQ list using SA addresses */ +static VNET_DEFINE(struct secacq_list *, acqaddrhashtbl); +static VNET_DEFINE(u_long, acqaddrhash_mask); +#define V_acqaddrhashtbl VNET(acqaddrhashtbl) +#define V_acqaddrhash_mask VNET(acqaddrhash_mask) + +/* Hash table for lookup in ACQ list using SEQ number */ +static VNET_DEFINE(struct secacq_list *, acqseqhashtbl); +static VNET_DEFINE(u_long, acqseqhash_mask); +#define V_acqseqhashtbl VNET(acqseqhashtbl) +#define V_acqseqhash_mask VNET(acqseqhash_mask) + +#define ACQHASH_NHASH_LOG2 7 +#define ACQHASH_NHASH (1 << ACQHASH_NHASH_LOG2) +#define ACQADDRHASH_HASHVAL(saidx) \ + (key_saidxhash(saidx) & V_acqaddrhash_mask) +#define ACQSEQHASH_HASHVAL(seq) \ + (key_u32hash(seq) & V_acqseqhash_mask) +#define ACQADDRHASH_HASH(saidx) \ + &V_acqaddrhashtbl[ACQADDRHASH_HASHVAL(saidx)] +#define ACQSEQHASH_HASH(seq) \ + &V_acqseqhashtbl[ACQSEQHASH_HASHVAL(seq)] /* SP acquiring list */ static VNET_DEFINE(LIST_HEAD(_spacqtree, secspacq), spacqtree); #define V_spacqtree VNET(spacqtree) @@ -198,22 +310,6 @@ static struct mtx spacq_lock; #define SPACQ_UNLOCK() mtx_unlock(&spacq_lock) #define SPACQ_LOCK_ASSERT() mtx_assert(&spacq_lock, MA_OWNED) -/* search order for SAs */ -static const u_int saorder_state_valid_prefer_old[] = { - SADB_SASTATE_DYING, SADB_SASTATE_MATURE, -}; -static const u_int saorder_state_valid_prefer_new[] = { - SADB_SASTATE_MATURE, SADB_SASTATE_DYING, -}; -static const u_int saorder_state_alive[] = { - /* except DEAD */ - SADB_SASTATE_MATURE, SADB_SASTATE_DYING, SADB_SASTATE_LARVAL -}; -static const u_int saorder_state_any[] = { - SADB_SASTATE_MATURE, SADB_SASTATE_DYING, - SADB_SASTATE_LARVAL, SADB_SASTATE_DEAD -}; - static const int minsize[] = { sizeof(struct sadb_msg), /* SADB_EXT_RESERVED */ sizeof(struct sadb_sa), /* SADB_EXT_SA */ @@ -241,7 +337,12 @@ static const int minsize[] = { sizeof(struct sadb_address), /* SADB_X_EXT_NAT_T_OAI */ sizeof(struct sadb_address), /* SADB_X_EXT_NAT_T_OAR */ sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */ + sizeof(struct sadb_x_sa_replay), /* SADB_X_EXT_SA_REPLAY */ + sizeof(struct sadb_address), /* SADB_X_EXT_NEW_ADDRESS_SRC */ + sizeof(struct sadb_address), /* SADB_X_EXT_NEW_ADDRESS_DST */ }; +_Static_assert(sizeof(minsize)/sizeof(int) == SADB_EXT_MAX + 1, "minsize size mismatch"); + static const int maxsize[] = { sizeof(struct sadb_msg), /* SADB_EXT_RESERVED */ sizeof(struct sadb_sa), /* SADB_EXT_SA */ @@ -269,7 +370,23 @@ static const int maxsize[] = { 0, /* SADB_X_EXT_NAT_T_OAI */ 0, /* SADB_X_EXT_NAT_T_OAR */ sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */ + sizeof(struct sadb_x_sa_replay), /* SADB_X_EXT_SA_REPLAY */ + 0, /* SADB_X_EXT_NEW_ADDRESS_SRC */ + 0, /* SADB_X_EXT_NEW_ADDRESS_DST */ }; +_Static_assert(sizeof(maxsize)/sizeof(int) == SADB_EXT_MAX + 1, "minsize size mismatch"); + +/* + * Internal values for SA flags: + * SADB_X_EXT_F_CLONED means that SA was cloned by key_updateaddresses, + * thus we will not free the most of SA content in key_delsav(). + */ +#define SADB_X_EXT_F_CLONED 0x80000000 + +#define SADB_CHECKLEN(_mhp, _ext) \ + ((_mhp)->extlen[(_ext)] < minsize[(_ext)] || (maxsize[(_ext)] != 0 && \ + ((_mhp)->extlen[(_ext)] > maxsize[(_ext)]))) +#define SADB_CHECKHDR(_mhp, _ext) ((_mhp)->ext[(_ext)] == NULL) static VNET_DEFINE(int, ipsec_esp_keymin) = 256; static VNET_DEFINE(int, ipsec_esp_auth) = 0; @@ -279,88 +396,75 @@ static VNET_DEFINE(int, ipsec_ah_keymin) = 128; #define V_ipsec_esp_auth VNET(ipsec_esp_auth) #define V_ipsec_ah_keymin VNET(ipsec_ah_keymin) -#ifdef SYSCTL_DECL -SYSCTL_DECL(_net_key); +#ifdef IPSEC_DEBUG +VNET_DEFINE(int, ipsec_debug) = 1; +#else +VNET_DEFINE(int, ipsec_debug) = 0; +#endif + +#ifdef INET +SYSCTL_DECL(_net_inet_ipsec); +SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEBUG, debug, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0, + "Enable IPsec debugging output when set."); +#endif +#ifdef INET6 +SYSCTL_DECL(_net_inet6_ipsec6); +SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEBUG, debug, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0, + "Enable IPsec debugging output when set."); #endif -SYSCTL_VNET_INT(_net_key, KEYCTL_DEBUG_LEVEL, debug, - CTLFLAG_RW, &VNET_NAME(key_debug_level), 0, ""); +SYSCTL_DECL(_net_key); +SYSCTL_INT(_net_key, KEYCTL_DEBUG_LEVEL, debug, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_debug_level), 0, ""); /* max count of trial for the decision of spi value */ -SYSCTL_VNET_INT(_net_key, KEYCTL_SPI_TRY, spi_trycnt, - CTLFLAG_RW, &VNET_NAME(key_spi_trycnt), 0, ""); +SYSCTL_INT(_net_key, KEYCTL_SPI_TRY, spi_trycnt, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_spi_trycnt), 0, ""); /* minimum spi value to allocate automatically. */ -SYSCTL_VNET_INT(_net_key, KEYCTL_SPI_MIN_VALUE, - spi_minval, CTLFLAG_RW, &VNET_NAME(key_spi_minval), 0, ""); +SYSCTL_INT(_net_key, KEYCTL_SPI_MIN_VALUE, spi_minval, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_spi_minval), 0, ""); /* maximun spi value to allocate automatically. */ -SYSCTL_VNET_INT(_net_key, KEYCTL_SPI_MAX_VALUE, - spi_maxval, CTLFLAG_RW, &VNET_NAME(key_spi_maxval), 0, ""); +SYSCTL_INT(_net_key, KEYCTL_SPI_MAX_VALUE, spi_maxval, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_spi_maxval), 0, ""); /* interval to initialize randseed */ -SYSCTL_VNET_INT(_net_key, KEYCTL_RANDOM_INT, - int_random, CTLFLAG_RW, &VNET_NAME(key_int_random), 0, ""); +SYSCTL_INT(_net_key, KEYCTL_RANDOM_INT, int_random, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_int_random), 0, ""); /* lifetime for larval SA */ -SYSCTL_VNET_INT(_net_key, KEYCTL_LARVAL_LIFETIME, - larval_lifetime, CTLFLAG_RW, &VNET_NAME(key_larval_lifetime), 0, ""); +SYSCTL_INT(_net_key, KEYCTL_LARVAL_LIFETIME, larval_lifetime, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_larval_lifetime), 0, ""); /* counter for blocking to send SADB_ACQUIRE to IKEd */ -SYSCTL_VNET_INT(_net_key, KEYCTL_BLOCKACQ_COUNT, - blockacq_count, CTLFLAG_RW, &VNET_NAME(key_blockacq_count), 0, ""); +SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_COUNT, blockacq_count, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_blockacq_count), 0, ""); /* lifetime for blocking to send SADB_ACQUIRE to IKEd */ -SYSCTL_VNET_INT(_net_key, KEYCTL_BLOCKACQ_LIFETIME, - blockacq_lifetime, CTLFLAG_RW, &VNET_NAME(key_blockacq_lifetime), 0, ""); +SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_LIFETIME, blockacq_lifetime, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_blockacq_lifetime), 0, ""); /* ESP auth */ -SYSCTL_VNET_INT(_net_key, KEYCTL_ESP_AUTH, esp_auth, - CTLFLAG_RW, &VNET_NAME(ipsec_esp_auth), 0, ""); +SYSCTL_INT(_net_key, KEYCTL_ESP_AUTH, esp_auth, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_esp_auth), 0, ""); /* minimum ESP key length */ -SYSCTL_VNET_INT(_net_key, KEYCTL_ESP_KEYMIN, - esp_keymin, CTLFLAG_RW, &VNET_NAME(ipsec_esp_keymin), 0, ""); +SYSCTL_INT(_net_key, KEYCTL_ESP_KEYMIN, esp_keymin, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_esp_keymin), 0, ""); /* minimum AH key length */ -SYSCTL_VNET_INT(_net_key, KEYCTL_AH_KEYMIN, ah_keymin, - CTLFLAG_RW, &VNET_NAME(ipsec_ah_keymin), 0, ""); +SYSCTL_INT(_net_key, KEYCTL_AH_KEYMIN, ah_keymin, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_ah_keymin), 0, ""); /* perfered old SA rather than new SA */ -SYSCTL_VNET_INT(_net_key, KEYCTL_PREFERED_OLDSA, - preferred_oldsa, CTLFLAG_RW, &VNET_NAME(key_preferred_oldsa), 0, ""); +SYSCTL_INT(_net_key, KEYCTL_PREFERED_OLDSA, preferred_oldsa, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_preferred_oldsa), 0, ""); #define __LIST_CHAINED(elm) \ (!((elm)->chain.le_next == NULL && (elm)->chain.le_prev == NULL)) -#define LIST_INSERT_TAIL(head, elm, type, field) \ -do {\ - struct type *curelm = LIST_FIRST(head); \ - if (curelm == NULL) {\ - LIST_INSERT_HEAD(head, elm, field); \ - } else { \ - while (LIST_NEXT(curelm, field)) \ - curelm = LIST_NEXT(curelm, field);\ - LIST_INSERT_AFTER(curelm, elm, field);\ - }\ -} while (0) - -#define KEY_CHKSASTATE(head, sav, name) \ -do { \ - if ((head) != (sav)) { \ - ipseclog((LOG_DEBUG, "%s: state mismatched (TREE=%d SA=%d)\n", \ - (name), (head), (sav))); \ - continue; \ - } \ -} while (0) - -#define KEY_CHKSPDIR(head, sp, name) \ -do { \ - if ((head) != (sp)) { \ - ipseclog((LOG_DEBUG, "%s: direction mismatched (TREE=%d SP=%d), " \ - "anyway continue.\n", \ - (name), (head), (sp))); \ - } \ -} while (0) MALLOC_DEFINE(M_IPSEC_SA, "secasvar", "ipsec security association"); MALLOC_DEFINE(M_IPSEC_SAH, "sahead", "ipsec sa head"); @@ -370,6 +474,17 @@ MALLOC_DEFINE(M_IPSEC_MISC, "ipsec-misc", "ipsec miscellaneous"); MALLOC_DEFINE(M_IPSEC_SAQ, "ipsec-saq", "ipsec sa acquire"); MALLOC_DEFINE(M_IPSEC_SAR, "ipsec-reg", "ipsec sa acquire"); +static VNET_DEFINE(uma_zone_t, key_lft_zone); +#define V_key_lft_zone VNET(key_lft_zone) + +static LIST_HEAD(xforms_list, xformsw) xforms = LIST_HEAD_INITIALIZER(); +static struct mtx xforms_lock; +#define XFORMS_LOCK_INIT() \ + mtx_init(&xforms_lock, "xforms_list", "IPsec transforms list", MTX_DEF) +#define XFORMS_LOCK_DESTROY() mtx_destroy(&xforms_lock) +#define XFORMS_LOCK() mtx_lock(&xforms_lock) +#define XFORMS_UNLOCK() mtx_unlock(&xforms_lock) + /* * set parameters into secpolicyindex buffer. * Must allocate secpolicyindex buffer passed to this function. @@ -397,6 +512,8 @@ do { \ (idx)->reqid = (r); \ bcopy((s), &(idx)->src, ((const struct sockaddr *)(s))->sa_len); \ bcopy((d), &(idx)->dst, ((const struct sockaddr *)(d))->sa_len); \ + key_porttosaddr(&(idx)->src.sa, 0); \ + key_porttosaddr(&(idx)->dst.sa, 0); \ } while (0) /* key statistics */ @@ -411,176 +528,206 @@ struct sadb_msghdr { int extlen[SADB_EXT_MAX + 1]; }; -static struct secasvar *key_allocsa_policy __P((const struct secasindex *)); -static void key_freesp_so __P((struct secpolicy **)); -static struct secasvar *key_do_allocsa_policy __P((struct secashead *, u_int)); -static void key_delsp __P((struct secpolicy *)); -static struct secpolicy *key_getsp __P((struct secpolicyindex *)); -static void _key_delsp(struct secpolicy *sp); -static struct secpolicy *key_getspbyid __P((u_int32_t)); -static u_int32_t key_newreqid __P((void)); -static struct mbuf *key_gather_mbuf __P((struct mbuf *, - const struct sadb_msghdr *, int, int, ...)); -static int key_spdadd __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static u_int32_t key_getnewspid __P((void)); -static int key_spddelete __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_spddelete2 __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_spdget __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_spdflush __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_spddump __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static struct mbuf *key_setdumpsp __P((struct secpolicy *, - u_int8_t, u_int32_t, u_int32_t)); -static u_int key_getspreqmsglen __P((struct secpolicy *)); -static int key_spdexpire __P((struct secpolicy *)); -static struct secashead *key_newsah __P((struct secasindex *)); -static void key_delsah __P((struct secashead *)); -static struct secasvar *key_newsav __P((struct mbuf *, - const struct sadb_msghdr *, struct secashead *, int *, - const char*, int)); -#define KEY_NEWSAV(m, sadb, sah, e) \ - key_newsav(m, sadb, sah, e, __FILE__, __LINE__) -static void key_delsav __P((struct secasvar *)); -static struct secashead *key_getsah __P((struct secasindex *)); -static struct secasvar *key_checkspidup __P((struct secasindex *, u_int32_t)); -static struct secasvar *key_getsavbyspi __P((struct secashead *, u_int32_t)); -static int key_setsaval __P((struct secasvar *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_mature __P((struct secasvar *)); -static struct mbuf *key_setdumpsa __P((struct secasvar *, u_int8_t, - u_int8_t, u_int32_t, u_int32_t)); -static struct mbuf *key_setsadbmsg __P((u_int8_t, u_int16_t, u_int8_t, - u_int32_t, pid_t, u_int16_t)); -static struct mbuf *key_setsadbsa __P((struct secasvar *)); -static struct mbuf *key_setsadbaddr __P((u_int16_t, - const struct sockaddr *, u_int8_t, u_int16_t)); -#ifdef IPSEC_NAT_T +static struct supported_ealgs { + int sadb_alg; + const struct enc_xform *xform; +} supported_ealgs[] = { + { SADB_EALG_DESCBC, &enc_xform_des }, + { SADB_EALG_3DESCBC, &enc_xform_3des }, + { SADB_X_EALG_AES, &enc_xform_rijndael128 }, + { SADB_X_EALG_BLOWFISHCBC, &enc_xform_blf }, + { SADB_X_EALG_CAST128CBC, &enc_xform_cast5 }, + { SADB_EALG_NULL, &enc_xform_null }, + { SADB_X_EALG_CAMELLIACBC, &enc_xform_camellia }, + { SADB_X_EALG_AESCTR, &enc_xform_aes_icm }, + { SADB_X_EALG_AESGCM16, &enc_xform_aes_nist_gcm }, + { SADB_X_EALG_AESGMAC, &enc_xform_aes_nist_gmac }, +}; + +static struct supported_aalgs { + int sadb_alg; + const struct auth_hash *xform; +} supported_aalgs[] = { + { SADB_X_AALG_NULL, &auth_hash_null }, + { SADB_AALG_MD5HMAC, &auth_hash_hmac_md5 }, + { SADB_AALG_SHA1HMAC, &auth_hash_hmac_sha1 }, + { SADB_X_AALG_RIPEMD160HMAC, &auth_hash_hmac_ripemd_160 }, + { SADB_X_AALG_MD5, &auth_hash_key_md5 }, + { SADB_X_AALG_SHA, &auth_hash_key_sha1 }, + { SADB_X_AALG_SHA2_256, &auth_hash_hmac_sha2_256 }, + { SADB_X_AALG_SHA2_384, &auth_hash_hmac_sha2_384 }, + { SADB_X_AALG_SHA2_512, &auth_hash_hmac_sha2_512 }, + { SADB_X_AALG_AES128GMAC, &auth_hash_nist_gmac_aes_128 }, + { SADB_X_AALG_AES192GMAC, &auth_hash_nist_gmac_aes_192 }, + { SADB_X_AALG_AES256GMAC, &auth_hash_nist_gmac_aes_256 }, +}; + +static struct supported_calgs { + int sadb_alg; + const struct comp_algo *xform; +} supported_calgs[] = { + { SADB_X_CALG_DEFLATE, &comp_algo_deflate }, +}; + +#ifndef IPSEC_DEBUG2 +static struct callout key_timer; +#endif + +static void key_unlink(struct secpolicy *); +static struct secpolicy *key_getsp(struct secpolicyindex *); +static struct secpolicy *key_getspbyid(u_int32_t); +static struct mbuf *key_gather_mbuf(struct mbuf *, + const struct sadb_msghdr *, int, int, ...); +static int key_spdadd(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static uint32_t key_getnewspid(void); +static int key_spddelete(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_spddelete2(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_spdget(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_spdflush(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_spddump(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static struct mbuf *key_setdumpsp(struct secpolicy *, + u_int8_t, u_int32_t, u_int32_t); +static struct mbuf *key_sp2mbuf(struct secpolicy *); +static size_t key_getspreqmsglen(struct secpolicy *); +static int key_spdexpire(struct secpolicy *); +static struct secashead *key_newsah(struct secasindex *); +static void key_freesah(struct secashead **); +static void key_delsah(struct secashead *); +static struct secasvar *key_newsav(const struct sadb_msghdr *, + struct secasindex *, uint32_t, int *); +static void key_delsav(struct secasvar *); +static void key_unlinksav(struct secasvar *); +static struct secashead *key_getsah(struct secasindex *); +static int key_checkspidup(uint32_t); +static struct secasvar *key_getsavbyspi(uint32_t); +static int key_setnatt(struct secasvar *, const struct sadb_msghdr *); +static int key_setsaval(struct secasvar *, const struct sadb_msghdr *); +static int key_updatelifetimes(struct secasvar *, const struct sadb_msghdr *); +static int key_updateaddresses(struct socket *, struct mbuf *, + const struct sadb_msghdr *, struct secasvar *, struct secasindex *); + +static struct mbuf *key_setdumpsa(struct secasvar *, u_int8_t, + u_int8_t, u_int32_t, u_int32_t); +static struct mbuf *key_setsadbmsg(u_int8_t, u_int16_t, u_int8_t, + u_int32_t, pid_t, u_int16_t); +static struct mbuf *key_setsadbsa(struct secasvar *); +static struct mbuf *key_setsadbaddr(u_int16_t, + const struct sockaddr *, u_int8_t, u_int16_t); static struct mbuf *key_setsadbxport(u_int16_t, u_int16_t); static struct mbuf *key_setsadbxtype(u_int16_t); -#endif -static void key_porttosaddr(struct sockaddr *, u_int16_t); -#define KEY_PORTTOSADDR(saddr, port) \ - key_porttosaddr((struct sockaddr *)(saddr), (port)) -static struct mbuf *key_setsadbxsa2 __P((u_int8_t, u_int32_t, u_int32_t)); -static struct mbuf *key_setsadbxpolicy __P((u_int16_t, u_int8_t, - u_int32_t)); -static struct seckey *key_dup_keymsg(const struct sadb_key *, u_int, - struct malloc_type *); +static struct mbuf *key_setsadbxsa2(u_int8_t, u_int32_t, u_int32_t); +static struct mbuf *key_setsadbxsareplay(u_int32_t); +static struct mbuf *key_setsadbxpolicy(u_int16_t, u_int8_t, + u_int32_t, u_int32_t); +static struct seckey *key_dup_keymsg(const struct sadb_key *, size_t, + struct malloc_type *); static struct seclifetime *key_dup_lifemsg(const struct sadb_lifetime *src, - struct malloc_type *type); -#ifdef INET6 -static int key_ismyaddr6 __P((struct sockaddr_in6 *)); -#endif + struct malloc_type *); /* flags for key_cmpsaidx() */ #define CMP_HEAD 1 /* protocol, addresses. */ #define CMP_MODE_REQID 2 /* additionally HEAD, reqid, mode. */ #define CMP_REQID 3 /* additionally HEAD, reaid. */ #define CMP_EXACTLY 4 /* all elements. */ -static int key_cmpsaidx - __P((const struct secasindex *, const struct secasindex *, int)); - -static int key_cmpspidx_exactly - __P((struct secpolicyindex *, struct secpolicyindex *)); -static int key_cmpspidx_withmask - __P((struct secpolicyindex *, struct secpolicyindex *)); -static int key_sockaddrcmp __P((const struct sockaddr *, const struct sockaddr *, int)); -static int key_bbcmp __P((const void *, const void *, u_int)); -static u_int16_t key_satype2proto __P((u_int8_t)); -static u_int8_t key_proto2satype __P((u_int16_t)); - -static int key_getspi __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static u_int32_t key_do_getnewspi __P((struct sadb_spirange *, - struct secasindex *)); -static int key_update __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -#ifdef IPSEC_DOSEQCHECK -static struct secasvar *key_getsavbyseq __P((struct secashead *, u_int32_t)); -#endif -static int key_add __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_setident __P((struct secashead *, struct mbuf *, - const struct sadb_msghdr *)); -static struct mbuf *key_getmsgbuf_x1 __P((struct mbuf *, - const struct sadb_msghdr *)); -static int key_delete __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_get __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); - -static void key_getcomb_setlifetime __P((struct sadb_comb *)); -static struct mbuf *key_getcomb_esp __P((void)); -static struct mbuf *key_getcomb_ah __P((void)); -static struct mbuf *key_getcomb_ipcomp __P((void)); -static struct mbuf *key_getprop __P((const struct secasindex *)); - -static int key_acquire __P((const struct secasindex *, struct secpolicy *)); -static struct secacq *key_newacq __P((const struct secasindex *)); -static struct secacq *key_getacq __P((const struct secasindex *)); -static struct secacq *key_getacqbyseq __P((u_int32_t)); -static struct secspacq *key_newspacq __P((struct secpolicyindex *)); -static struct secspacq *key_getspacq __P((struct secpolicyindex *)); -static int key_acquire2 __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_register __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_expire __P((struct secasvar *)); -static int key_flush __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_dump __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_promisc __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_senderror __P((struct socket *, struct mbuf *, int)); -static int key_validate_ext __P((const struct sadb_ext *, int)); -static int key_align __P((struct mbuf *, struct sadb_msghdr *)); -static struct mbuf *key_setlifetime(struct seclifetime *src, - u_int16_t exttype); -static struct mbuf *key_setkey(struct seckey *src, u_int16_t exttype); - -#if 0 -static const char *key_getfqdn __P((void)); -static const char *key_getuserfqdn __P((void)); -#endif -static void key_sa_chgstate __P((struct secasvar *, u_int8_t)); -static struct mbuf *key_alloc_mbuf __P((int)); - -static __inline void -sa_initref(struct secasvar *sav) -{ +static int key_cmpsaidx(const struct secasindex *, + const struct secasindex *, int); +static int key_cmpspidx_exactly(struct secpolicyindex *, + struct secpolicyindex *); +static int key_cmpspidx_withmask(struct secpolicyindex *, + struct secpolicyindex *); +static int key_bbcmp(const void *, const void *, u_int); +static uint8_t key_satype2proto(uint8_t); +static uint8_t key_proto2satype(uint8_t); + +static int key_getspi(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static uint32_t key_do_getnewspi(struct sadb_spirange *, struct secasindex *); +static int key_update(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_add(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_setident(struct secashead *, const struct sadb_msghdr *); +static struct mbuf *key_getmsgbuf_x1(struct mbuf *, + const struct sadb_msghdr *); +static int key_delete(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_delete_all(struct socket *, struct mbuf *, + const struct sadb_msghdr *, struct secasindex *); +static void key_delete_xform(const struct xformsw *); +static int key_get(struct socket *, struct mbuf *, + const struct sadb_msghdr *); + +static void key_getcomb_setlifetime(struct sadb_comb *); +static struct mbuf *key_getcomb_ealg(void); +static struct mbuf *key_getcomb_ah(void); +static struct mbuf *key_getcomb_ipcomp(void); +static struct mbuf *key_getprop(const struct secasindex *); + +static int key_acquire(const struct secasindex *, struct secpolicy *); +static uint32_t key_newacq(const struct secasindex *, int *); +static uint32_t key_getacq(const struct secasindex *, int *); +static int key_acqdone(const struct secasindex *, uint32_t); +static int key_acqreset(uint32_t); +static struct secspacq *key_newspacq(struct secpolicyindex *); +static struct secspacq *key_getspacq(struct secpolicyindex *); +static int key_acquire2(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_register(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_expire(struct secasvar *, int); +static int key_flush(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_dump(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_promisc(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_senderror(struct socket *, struct mbuf *, int); +static int key_validate_ext(const struct sadb_ext *, int); +static int key_align(struct mbuf *, struct sadb_msghdr *); +static struct mbuf *key_setlifetime(struct seclifetime *, uint16_t); +static struct mbuf *key_setkey(struct seckey *, uint16_t); +static int xform_init(struct secasvar *, u_short); + +#define DBG_IPSEC_INITREF(t, p) do { \ + refcount_init(&(p)->refcnt, 1); \ + KEYDBG(KEY_STAMP, \ + printf("%s: Initialize refcnt %s(%p) = %u\n", \ + __func__, #t, (p), (p)->refcnt)); \ +} while (0) +#define DBG_IPSEC_ADDREF(t, p) do { \ + refcount_acquire(&(p)->refcnt); \ + KEYDBG(KEY_STAMP, \ + printf("%s: Acquire refcnt %s(%p) -> %u\n", \ + __func__, #t, (p), (p)->refcnt)); \ +} while (0) +#define DBG_IPSEC_DELREF(t, p) do { \ + KEYDBG(KEY_STAMP, \ + printf("%s: Release refcnt %s(%p) -> %u\n", \ + __func__, #t, (p), (p)->refcnt - 1)); \ + refcount_release(&(p)->refcnt); \ +} while (0) - refcount_init(&sav->refcnt, 1); -} -static __inline void -sa_addref(struct secasvar *sav) -{ +#define IPSEC_INITREF(t, p) refcount_init(&(p)->refcnt, 1) +#define IPSEC_ADDREF(t, p) refcount_acquire(&(p)->refcnt) +#define IPSEC_DELREF(t, p) refcount_release(&(p)->refcnt) - refcount_acquire(&sav->refcnt); - IPSEC_ASSERT(sav->refcnt != 0, ("SA refcnt overflow")); -} -static __inline int -sa_delref(struct secasvar *sav) -{ +#define SP_INITREF(p) IPSEC_INITREF(SP, p) +#define SP_ADDREF(p) IPSEC_ADDREF(SP, p) +#define SP_DELREF(p) IPSEC_DELREF(SP, p) - IPSEC_ASSERT(sav->refcnt > 0, ("SA refcnt underflow")); - return (refcount_release(&sav->refcnt)); -} +#define SAH_INITREF(p) IPSEC_INITREF(SAH, p) +#define SAH_ADDREF(p) IPSEC_ADDREF(SAH, p) +#define SAH_DELREF(p) IPSEC_DELREF(SAH, p) -#define SP_ADDREF(p) do { \ - (p)->refcnt++; \ - IPSEC_ASSERT((p)->refcnt != 0, ("SP refcnt overflow")); \ -} while (0) -#define SP_DELREF(p) do { \ - IPSEC_ASSERT((p)->refcnt > 0, ("SP refcnt underflow")); \ - (p)->refcnt--; \ -} while (0) - +#define SAV_INITREF(p) IPSEC_INITREF(SAV, p) +#define SAV_ADDREF(p) IPSEC_ADDREF(SAV, p) +#define SAV_DELREF(p) IPSEC_DELREF(SAV, p) /* * Update the refcnt while holding the SPTREE lock. @@ -588,9 +735,8 @@ sa_delref(struct secasvar *sav) void key_addref(struct secpolicy *sp) { - SPTREE_LOCK(); + SP_ADDREF(sp); - SPTREE_UNLOCK(); } /* @@ -603,60 +749,54 @@ key_havesp(u_int dir) { return (dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND ? - LIST_FIRST(&V_sptree[dir]) != NULL : 1); + TAILQ_FIRST(&V_sptree[dir]) != NULL : 1); } /* %%% IPsec policy management */ /* - * allocating a SP for OUTBOUND or INBOUND packet. - * Must call key_freesp() later. - * OUT: NULL: not found - * others: found and return the pointer. + * Return current SPDB generation. */ -struct secpolicy * -key_allocsp(struct secpolicyindex *spidx, u_int dir, const char* where, int tag) +uint32_t +key_getspgen(void) { - struct secpolicy *sp; - IPSEC_ASSERT(spidx != NULL, ("null spidx")); - IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, - ("invalid direction %u", dir)); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); + return (V_sp_genid); +} - /* get a SP entry */ - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** objects\n"); - kdebug_secpolicyindex(spidx)); +void +key_bumpspgen(void) +{ - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[dir], chain) { - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** in SPD\n"); - kdebug_secpolicyindex(&sp->spidx)); + V_sp_genid++; +} - if (sp->state == IPSEC_SPSTATE_DEAD) - continue; - if (key_cmpspidx_withmask(&sp->spidx, spidx)) - goto found; - } - sp = NULL; -found: - if (sp) { - /* sanity check */ - KEY_CHKSPDIR(sp->spidx.dir, dir, __func__); +static int +key_checksockaddrs(struct sockaddr *src, struct sockaddr *dst) +{ - /* found a SPD entry */ - sp->lastused = time_second; - SP_ADDREF(sp); + /* family match */ + if (src->sa_family != dst->sa_family) + return (EINVAL); + /* sa_len match */ + if (src->sa_len != dst->sa_len) + return (EINVAL); + switch (src->sa_family) { +#ifdef INET + case AF_INET: + if (src->sa_len != sizeof(struct sockaddr_in)) + return (EINVAL); + break; +#endif +#ifdef INET6 + case AF_INET6: + if (src->sa_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + break; +#endif + default: + return (EAFNOSUPPORT); } - SPTREE_UNLOCK(); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, - sp, sp ? sp->id : 0, sp ? sp->refcnt : 0)); - return sp; + return (0); } /* @@ -666,577 +806,415 @@ found: * others: found and return the pointer. */ struct secpolicy * -key_allocsp2(u_int32_t spi, - union sockaddr_union *dst, - u_int8_t proto, - u_int dir, - const char* where, int tag) +key_allocsp(struct secpolicyindex *spidx, u_int dir) { + SPTREE_RLOCK_TRACKER; struct secpolicy *sp; - IPSEC_ASSERT(dst != NULL, ("null dst")); + IPSEC_ASSERT(spidx != NULL, ("null spidx")); IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, ("invalid direction %u", dir)); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); - - /* get a SP entry */ - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** objects\n"); - printf("spi %u proto %u dir %u\n", spi, proto, dir); - kdebug_sockaddr(&dst->sa)); - - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[dir], chain) { - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** in SPD\n"); - kdebug_secpolicyindex(&sp->spidx)); - - if (sp->state == IPSEC_SPSTATE_DEAD) - continue; - /* compare simple values, then dst address */ - if (sp->spidx.ul_proto != proto) - continue; - /* NB: spi's must exist and match */ - if (!sp->req || !sp->req->sav || sp->req->sav->spi != spi) - continue; - if (key_sockaddrcmp(&sp->spidx.dst.sa, &dst->sa, 1) == 0) - goto found; + SPTREE_RLOCK(); + TAILQ_FOREACH(sp, &V_sptree[dir], chain) { + if (key_cmpspidx_withmask(&sp->spidx, spidx)) { + SP_ADDREF(sp); + break; + } } - sp = NULL; -found: - if (sp) { - /* sanity check */ - KEY_CHKSPDIR(sp->spidx.dir, dir, __func__); + SPTREE_RUNLOCK(); - /* found a SPD entry */ + if (sp != NULL) { /* found a SPD entry */ sp->lastused = time_second; - SP_ADDREF(sp); + KEYDBG(IPSEC_STAMP, + printf("%s: return SP(%p)\n", __func__, sp)); + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); + } else { + KEYDBG(IPSEC_DATA, + printf("%s: lookup failed for ", __func__); + kdebug_secpolicyindex(spidx, NULL)); } - SPTREE_UNLOCK(); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, - sp, sp ? sp->id : 0, sp ? sp->refcnt : 0)); - return sp; + return (sp); } -#if 0 /* - * return a policy that matches this particular inbound packet. - * XXX slow + * Allocating an SA entry for an *INBOUND* or *OUTBOUND* TCP packet, signed + * or should be signed by MD5 signature. + * We don't use key_allocsa() for such lookups, because we don't know SPI. + * Unlike ESP and AH protocols, SPI isn't transmitted in the TCP header with + * signed packet. We use SADB only as storage for password. + * OUT: positive: corresponding SA for given saidx found. + * NULL: SA not found */ -struct secpolicy * -key_gettunnel(const struct sockaddr *osrc, - const struct sockaddr *odst, - const struct sockaddr *isrc, - const struct sockaddr *idst, - const char* where, int tag) +struct secasvar * +key_allocsa_tcpmd5(struct secasindex *saidx) { - struct secpolicy *sp; - const int dir = IPSEC_DIR_INBOUND; - struct ipsecrequest *r1, *r2, *p; - struct secpolicyindex spidx; - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); - - if (isrc->sa_family != idst->sa_family) { - ipseclog((LOG_ERR, "%s: protocol family mismatched %d != %d\n.", - __func__, isrc->sa_family, idst->sa_family)); - sp = NULL; - goto done; - } + SAHTREE_RLOCK_TRACKER; + struct secashead *sah; + struct secasvar *sav; - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[dir], chain) { - if (sp->state == IPSEC_SPSTATE_DEAD) + IPSEC_ASSERT(saidx->proto == IPPROTO_TCP, + ("unexpected security protocol %u", saidx->proto)); + IPSEC_ASSERT(saidx->mode == IPSEC_MODE_TCPMD5, + ("unexpected mode %u", saidx->mode)); + + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + KEYDBG(IPSEC_DUMP, + printf("%s: checking SAH\n", __func__); + kdebug_secash(sah, " ")); + if (sah->saidx.proto != IPPROTO_TCP) continue; - - r1 = r2 = NULL; - for (p = sp->req; p; p = p->next) { - if (p->saidx.mode != IPSEC_MODE_TUNNEL) - continue; - - r1 = r2; - r2 = p; - - if (!r1) { - /* here we look at address matches only */ - spidx = sp->spidx; - if (isrc->sa_len > sizeof(spidx.src) || - idst->sa_len > sizeof(spidx.dst)) - continue; - bcopy(isrc, &spidx.src, isrc->sa_len); - bcopy(idst, &spidx.dst, idst->sa_len); - if (!key_cmpspidx_withmask(&sp->spidx, &spidx)) - continue; - } else { - if (key_sockaddrcmp(&r1->saidx.src.sa, isrc, 0) || - key_sockaddrcmp(&r1->saidx.dst.sa, idst, 0)) - continue; - } - - if (key_sockaddrcmp(&r2->saidx.src.sa, osrc, 0) || - key_sockaddrcmp(&r2->saidx.dst.sa, odst, 0)) - continue; - - goto found; - } + if (!key_sockaddrcmp(&saidx->dst.sa, &sah->saidx.dst.sa, 0)) + break; } - sp = NULL; -found: - if (sp) { - sp->lastused = time_second; - SP_ADDREF(sp); + if (sah != NULL) { + if (V_key_preferred_oldsa) + sav = TAILQ_LAST(&sah->savtree_alive, secasvar_queue); + else + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav != NULL) + SAV_ADDREF(sav); + } else + sav = NULL; + SAHTREE_RUNLOCK(); + + if (sav != NULL) { + KEYDBG(IPSEC_STAMP, + printf("%s: return SA(%p)\n", __func__, sav)); + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + } else { + KEYDBG(IPSEC_STAMP, + printf("%s: SA not found\n", __func__)); + KEYDBG(IPSEC_DATA, kdebug_secasindex(saidx, NULL)); } - SPTREE_UNLOCK(); -done: - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, - sp, sp ? sp->id : 0, sp ? sp->refcnt : 0)); - return sp; + return (sav); } -#endif /* - * allocating an SA entry for an *OUTBOUND* packet. - * checking each request entries in SP, and acquire an SA if need. - * OUT: 0: there are valid requests. - * ENOENT: policy may be valid, but SA with REQUIRE is on acquiring. + * Allocating an SA entry for an *OUTBOUND* packet. + * OUT: positive: corresponding SA for given saidx found. + * NULL: SA not found, but will be acquired, check *error + * for acquiring status. */ -int -key_checkrequest(struct ipsecrequest *isr, const struct secasindex *saidx) +struct secasvar * +key_allocsa_policy(struct secpolicy *sp, const struct secasindex *saidx, + int *error) { - u_int level; - int error; + SAHTREE_RLOCK_TRACKER; + struct secashead *sah; struct secasvar *sav; - IPSEC_ASSERT(isr != NULL, ("null isr")); IPSEC_ASSERT(saidx != NULL, ("null saidx")); IPSEC_ASSERT(saidx->mode == IPSEC_MODE_TRANSPORT || saidx->mode == IPSEC_MODE_TUNNEL, ("unexpected policy %u", saidx->mode)); - /* - * XXX guard against protocol callbacks from the crypto - * thread as they reference ipsecrequest.sav which we - * temporarily null out below. Need to rethink how we - * handle bundled SA's in the callback thread. - */ - IPSECREQUEST_LOCK_ASSERT(isr); - - /* get current level */ - level = ipsec_get_reqlevel(isr); - /* * We check new SA in the IPsec request because a different * SA may be involved each time this request is checked, either * because new SAs are being configured, or this request is * associated with an unconnected datagram socket, or this request * is associated with a system default policy. - * - * key_allocsa_policy should allocate the oldest SA available. - * See key_do_allocsa_policy(), and draft-jenkins-ipsec-rekeying-03.txt. */ - sav = key_allocsa_policy(saidx); - if (sav != isr->sav) { - /* SA need to be updated. */ - if (!IPSECREQUEST_UPGRADE(isr)) { - /* Kick everyone off. */ - IPSECREQUEST_UNLOCK(isr); - IPSECREQUEST_WLOCK(isr); - } - if (isr->sav != NULL) - KEY_FREESAV(&isr->sav); - isr->sav = sav; - IPSECREQUEST_DOWNGRADE(isr); - } else if (sav != NULL) - KEY_FREESAV(&sav); - - /* When there is SA. */ - if (isr->sav != NULL) { - if (isr->sav->state != SADB_SASTATE_MATURE && - isr->sav->state != SADB_SASTATE_DYING) - return EINVAL; - return 0; - } + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + KEYDBG(IPSEC_DUMP, + printf("%s: checking SAH\n", __func__); + kdebug_secash(sah, " ")); + if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE_REQID)) + break; - /* there is no SA */ - error = key_acquire(saidx, isr->sp); - if (error != 0) { - /* XXX What should I do ? */ - ipseclog((LOG_DEBUG, "%s: error %d returned from key_acquire\n", - __func__, error)); - return error; } + if (sah != NULL) { + /* + * Allocate the oldest SA available according to + * draft-jenkins-ipsec-rekeying-03. + */ + if (V_key_preferred_oldsa) + sav = TAILQ_LAST(&sah->savtree_alive, secasvar_queue); + else + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav != NULL) + SAV_ADDREF(sav); + } else + sav = NULL; + SAHTREE_RUNLOCK(); - if (level != IPSEC_LEVEL_REQUIRE) { - /* XXX sigh, the interface to this routine is botched */ - IPSEC_ASSERT(isr->sav == NULL, ("unexpected SA")); - return 0; - } else { - return ENOENT; + if (sav != NULL) { + *error = 0; + KEYDBG(IPSEC_STAMP, + printf("%s: chosen SA(%p) for SP(%p)\n", __func__, + sav, sp)); + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + return (sav); /* return referenced SA */ } + + /* there is no SA */ + *error = key_acquire(saidx, sp); + if ((*error) != 0) + ipseclog((LOG_DEBUG, + "%s: error %d returned from key_acquire()\n", + __func__, *error)); + KEYDBG(IPSEC_STAMP, + printf("%s: acquire SA for SP(%p), error %d\n", + __func__, sp, *error)); + KEYDBG(IPSEC_DATA, kdebug_secasindex(saidx, NULL)); + return (NULL); } /* - * allocating a SA for policy entry from SAD. - * NOTE: searching SAD of aliving state. - * OUT: NULL: not found. - * others: found and return the pointer. + * allocating a usable SA entry for a *INBOUND* packet. + * Must call key_freesav() later. + * OUT: positive: pointer to a usable sav (i.e. MATURE or DYING state). + * NULL: not found, or error occurred. + * + * According to RFC 2401 SA is uniquely identified by a triple SPI, + * destination address, and security protocol. But according to RFC 4301, + * SPI by itself suffices to specify an SA. + * + * Note that, however, we do need to keep source address in IPsec SA. + * IKE specification and PF_KEY specification do assume that we + * keep source address in IPsec SA. We see a tricky situation here. */ -static struct secasvar * -key_allocsa_policy(const struct secasindex *saidx) +struct secasvar * +key_allocsa(union sockaddr_union *dst, uint8_t proto, uint32_t spi) { -#define N(a) _ARRAYLEN(a) - struct secashead *sah; + SAHTREE_RLOCK_TRACKER; struct secasvar *sav; - u_int stateidx, arraysize; - const u_int *state_valid; - state_valid = NULL; /* silence gcc */ - arraysize = 0; /* silence gcc */ + IPSEC_ASSERT(proto == IPPROTO_ESP || proto == IPPROTO_AH || + proto == IPPROTO_IPCOMP, ("unexpected security protocol %u", + proto)); - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE_REQID)) { - if (V_key_preferred_oldsa) { - state_valid = saorder_state_valid_prefer_old; - arraysize = N(saorder_state_valid_prefer_old); - } else { - state_valid = saorder_state_valid_prefer_new; - arraysize = N(saorder_state_valid_prefer_new); - } + SAHTREE_RLOCK(); + LIST_FOREACH(sav, SAVHASH_HASH(spi), spihash) { + if (sav->spi == spi) break; - } } - SAHTREE_UNLOCK(); - if (sah == NULL) - return NULL; - - /* search valid state */ - for (stateidx = 0; stateidx < arraysize; stateidx++) { - sav = key_do_allocsa_policy(sah, state_valid[stateidx]); - if (sav != NULL) - return sav; + /* + * We use single SPI namespace for all protocols, so it is + * impossible to have SPI duplicates in the SAVHASH. + */ + if (sav != NULL) { + if (sav->state != SADB_SASTATE_LARVAL && + sav->sah->saidx.proto == proto && + key_sockaddrcmp(&dst->sa, + &sav->sah->saidx.dst.sa, 0) == 0) + SAV_ADDREF(sav); + else + sav = NULL; } + SAHTREE_RUNLOCK(); - return NULL; -#undef N + if (sav == NULL) { + KEYDBG(IPSEC_STAMP, + char buf[IPSEC_ADDRSTRLEN]; + printf("%s: SA not found for spi %u proto %u dst %s\n", + __func__, ntohl(spi), proto, ipsec_address(dst, buf, + sizeof(buf)))); + } else { + KEYDBG(IPSEC_STAMP, + printf("%s: return SA(%p)\n", __func__, sav)); + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + } + return (sav); } -/* - * searching SAD with direction, protocol, mode and state. - * called by key_allocsa_policy(). - * OUT: - * NULL : not found - * others : found, pointer to a SA. - */ -static struct secasvar * -key_do_allocsa_policy(struct secashead *sah, u_int state) +struct secasvar * +key_allocsa_tunnel(union sockaddr_union *src, union sockaddr_union *dst, + uint8_t proto) { - struct secasvar *sav, *nextsav, *candidate, *d; - - /* initilize */ - candidate = NULL; - - SAHTREE_LOCK(); - for (sav = LIST_FIRST(&sah->savtree[state]); - sav != NULL; - sav = nextsav) { + SAHTREE_RLOCK_TRACKER; + struct secasindex saidx; + struct secashead *sah; + struct secasvar *sav; - nextsav = LIST_NEXT(sav, chain); + IPSEC_ASSERT(src != NULL, ("null src address")); + IPSEC_ASSERT(dst != NULL, ("null dst address")); - /* sanity check */ - KEY_CHKSASTATE(sav->state, state, __func__); + KEY_SETSECASIDX(proto, IPSEC_MODE_TUNNEL, 0, &src->sa, + &dst->sa, &saidx); - /* initialize */ - if (candidate == NULL) { - candidate = sav; + sav = NULL; + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(&saidx), addrhash) { + if (IPSEC_MODE_TUNNEL != sah->saidx.mode) continue; - } - - /* Which SA is the better ? */ - - IPSEC_ASSERT(candidate->lft_c != NULL, - ("null candidate lifetime")); - IPSEC_ASSERT(sav->lft_c != NULL, ("null sav lifetime")); - - /* What the best method is to compare ? */ - if (V_key_preferred_oldsa) { - if (candidate->lft_c->addtime > - sav->lft_c->addtime) { - candidate = sav; - } + if (proto != sah->saidx.proto) continue; - /*NOTREACHED*/ + if (key_sockaddrcmp(&src->sa, &sah->saidx.src.sa, 0) != 0) + continue; + if (key_sockaddrcmp(&dst->sa, &sah->saidx.dst.sa, 0) != 0) + continue; + /* XXXAE: is key_preferred_oldsa reasonably?*/ + if (V_key_preferred_oldsa) + sav = TAILQ_LAST(&sah->savtree_alive, secasvar_queue); + else + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav != NULL) { + SAV_ADDREF(sav); + break; } + } + SAHTREE_RUNLOCK(); + KEYDBG(IPSEC_STAMP, + printf("%s: return SA(%p)\n", __func__, sav)); + if (sav != NULL) + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + return (sav); +} - /* preferred new sa rather than old sa */ - if (candidate->lft_c->addtime < - sav->lft_c->addtime) { - d = candidate; - candidate = sav; - } else - d = sav; +/* + * Must be called after calling key_allocsp(). + */ +void +key_freesp(struct secpolicy **spp) +{ + struct secpolicy *sp = *spp; - /* - * prepared to delete the SA when there is more - * suitable candidate and the lifetime of the SA is not - * permanent. - */ - if (d->lft_h->addtime != 0) { - struct mbuf *m, *result; - u_int8_t satype; + IPSEC_ASSERT(sp != NULL, ("null sp")); + if (SP_DELREF(sp) == 0) + return; - key_sa_chgstate(d, SADB_SASTATE_DEAD); + KEYDBG(IPSEC_STAMP, + printf("%s: last reference to SP(%p)\n", __func__, sp)); + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); - IPSEC_ASSERT(d->refcnt > 0, ("bogus ref count")); + *spp = NULL; + while (sp->tcount > 0) + ipsec_delisr(sp->req[--sp->tcount]); + free(sp, M_IPSEC_SP); +} - satype = key_proto2satype(d->sah->saidx.proto); - if (satype == 0) - goto msgfail; +static void +key_unlink(struct secpolicy *sp) +{ - m = key_setsadbmsg(SADB_DELETE, 0, - satype, 0, 0, d->refcnt - 1); - if (!m) - goto msgfail; - result = m; - - /* set sadb_address for saidx's. */ - m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, - &d->sah->saidx.src.sa, - d->sah->saidx.src.sa.sa_len << 3, - IPSEC_ULPROTO_ANY); - if (!m) - goto msgfail; - m_cat(result, m); - - /* set sadb_address for saidx's. */ - m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, - &d->sah->saidx.dst.sa, - d->sah->saidx.dst.sa.sa_len << 3, - IPSEC_ULPROTO_ANY); - if (!m) - goto msgfail; - m_cat(result, m); - - /* create SA extension */ - m = key_setsadbsa(d); - if (!m) - goto msgfail; - m_cat(result, m); - - if (result->m_len < sizeof(struct sadb_msg)) { - result = m_pullup(result, - sizeof(struct sadb_msg)); - if (result == NULL) - goto msgfail; - } - - result->m_pkthdr.len = 0; - for (m = result; m; m = m->m_next) - result->m_pkthdr.len += m->m_len; - mtod(result, struct sadb_msg *)->sadb_msg_len = - PFKEY_UNIT64(result->m_pkthdr.len); - - if (key_sendup_mbuf(NULL, result, - KEY_SENDUP_REGISTERED)) - goto msgfail; - msgfail: - KEY_FREESAV(&d); - } - } - if (candidate) { - sa_addref(candidate); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s cause refcnt++:%d SA:%p\n", - __func__, candidate->refcnt, candidate)); - } - SAHTREE_UNLOCK(); - - return candidate; -} + IPSEC_ASSERT(sp->spidx.dir == IPSEC_DIR_INBOUND || + sp->spidx.dir == IPSEC_DIR_OUTBOUND, + ("invalid direction %u", sp->spidx.dir)); + SPTREE_UNLOCK_ASSERT(); + + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + SPTREE_WLOCK(); + if (sp->state != IPSEC_SPSTATE_ALIVE) { + /* SP is already unlinked */ + SPTREE_WUNLOCK(); + return; + } + sp->state = IPSEC_SPSTATE_DEAD; + TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); + LIST_REMOVE(sp, idhash); + V_sp_genid++; + SPTREE_WUNLOCK(); + key_freesp(&sp); +} /* - * allocating a usable SA entry for a *INBOUND* packet. - * Must call key_freesav() later. - * OUT: positive: pointer to a usable sav (i.e. MATURE or DYING state). - * NULL: not found, or error occured. - * - * In the comparison, no source address is used--for RFC2401 conformance. - * To quote, from section 4.1: - * A security association is uniquely identified by a triple consisting - * of a Security Parameter Index (SPI), an IP Destination Address, and a - * security protocol (AH or ESP) identifier. - * Note that, however, we do need to keep source address in IPsec SA. - * IKE specification and PF_KEY specification do assume that we - * keep source address in IPsec SA. We see a tricky situation here. + * insert a secpolicy into the SP database. Lower priorities first */ -struct secasvar * -key_allocsa( - union sockaddr_union *dst, - u_int proto, - u_int32_t spi, - const char* where, int tag) +static void +key_insertsp(struct secpolicy *newsp) { - struct secashead *sah; - struct secasvar *sav; - u_int stateidx, arraysize, state; - const u_int *saorder_state_valid; - int chkport; - - IPSEC_ASSERT(dst != NULL, ("null dst address")); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); - -#ifdef IPSEC_NAT_T - chkport = (dst->sa.sa_family == AF_INET && - dst->sa.sa_len == sizeof(struct sockaddr_in) && - dst->sin.sin_port != 0); -#else - chkport = 0; -#endif + struct secpolicy *sp; - /* - * searching SAD. - * XXX: to be checked internal IP header somewhere. Also when - * IPsec tunnel packet is received. But ESP tunnel mode is - * encrypted so we can't check internal IP header. - */ - SAHTREE_LOCK(); - if (V_key_preferred_oldsa) { - saorder_state_valid = saorder_state_valid_prefer_old; - arraysize = _ARRAYLEN(saorder_state_valid_prefer_old); - } else { - saorder_state_valid = saorder_state_valid_prefer_new; - arraysize = _ARRAYLEN(saorder_state_valid_prefer_new); - } - LIST_FOREACH(sah, &V_sahtree, chain) { - /* search valid state */ - for (stateidx = 0; stateidx < arraysize; stateidx++) { - state = saorder_state_valid[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - /* sanity check */ - KEY_CHKSASTATE(sav->state, state, __func__); - /* do not return entries w/ unusable state */ - if (sav->state != SADB_SASTATE_MATURE && - sav->state != SADB_SASTATE_DYING) - continue; - if (proto != sav->sah->saidx.proto) - continue; - if (spi != sav->spi) - continue; -#if 0 /* don't check src */ - /* check src address */ - if (key_sockaddrcmp(&src->sa, &sav->sah->saidx.src.sa, chkport) != 0) - continue; -#endif - /* check dst address */ - if (key_sockaddrcmp(&dst->sa, &sav->sah->saidx.dst.sa, chkport) != 0) - continue; - sa_addref(sav); - goto done; - } + SPTREE_WLOCK_ASSERT(); + TAILQ_FOREACH(sp, &V_sptree[newsp->spidx.dir], chain) { + if (newsp->priority < sp->priority) { + TAILQ_INSERT_BEFORE(sp, newsp, chain); + goto done; } } - sav = NULL; + TAILQ_INSERT_TAIL(&V_sptree[newsp->spidx.dir], newsp, chain); done: - SAHTREE_UNLOCK(); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SA:%p; refcnt %u\n", __func__, - sav, sav ? sav->refcnt : 0)); - return sav; + LIST_INSERT_HEAD(SPHASH_HASH(newsp->id), newsp, idhash); + newsp->state = IPSEC_SPSTATE_ALIVE; + V_sp_genid++; } /* - * Must be called after calling key_allocsp(). - * For both the packet without socket and key_freeso(). + * Insert a bunch of VTI secpolicies into the SPDB. + * We keep VTI policies in the separate list due to following reasons: + * 1) they should be immutable to user's or some deamon's attempts to + * delete. The only way delete such policies - destroy or unconfigure + * corresponding virtual inteface. + * 2) such policies have traffic selector that matches all traffic per + * address family. + * Since all VTI policies have the same priority, we don't care about + * policies order. */ -void -_key_freesp(struct secpolicy **spp, const char* where, int tag) +int +key_register_ifnet(struct secpolicy **spp, u_int count) { - struct secpolicy *sp = *spp; - - IPSEC_ASSERT(sp != NULL, ("null sp")); - - SPTREE_LOCK(); - SP_DELREF(sp); + struct mbuf *m; + u_int i; - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s SP:%p (ID=%u) from %s:%u; refcnt now %u\n", - __func__, sp, sp->id, where, tag, sp->refcnt)); + SPTREE_WLOCK(); + /* + * First of try to acquire id for each SP. + */ + for (i = 0; i < count; i++) { + IPSEC_ASSERT(spp[i]->spidx.dir == IPSEC_DIR_INBOUND || + spp[i]->spidx.dir == IPSEC_DIR_OUTBOUND, + ("invalid direction %u", spp[i]->spidx.dir)); - if (sp->refcnt == 0) { - *spp = NULL; - key_delsp(sp); + if ((spp[i]->id = key_getnewspid()) == 0) { + SPTREE_WUNLOCK(); + return (EAGAIN); + } + } + for (i = 0; i < count; i++) { + TAILQ_INSERT_TAIL(&V_sptree_ifnet[spp[i]->spidx.dir], + spp[i], chain); + /* + * NOTE: despite the fact that we keep VTI SP in the + * separate list, SPHASH contains policies from both + * sources. Thus SADB_X_SPDGET will correctly return + * SP by id, because it uses SPHASH for lookups. + */ + LIST_INSERT_HEAD(SPHASH_HASH(spp[i]->id), spp[i], idhash); + spp[i]->state = IPSEC_SPSTATE_IFNET; } - SPTREE_UNLOCK(); + SPTREE_WUNLOCK(); + /* + * Notify user processes about new SP. + */ + for (i = 0; i < count; i++) { + m = key_setdumpsp(spp[i], SADB_X_SPDADD, 0, 0); + if (m != NULL) + key_sendup_mbuf(NULL, m, KEY_SENDUP_ALL); + } + return (0); } -/* - * Must be called after calling key_allocsp(). - * For the packet with socket. - */ void -key_freeso(struct socket *so) +key_unregister_ifnet(struct secpolicy **spp, u_int count) { - IPSEC_ASSERT(so != NULL, ("null so")); + struct mbuf *m; + u_int i; - switch (so->so_proto->pr_domain->dom_family) { -#if defined(INET) || defined(INET6) -#ifdef INET - case PF_INET: -#endif -#ifdef INET6 - case PF_INET6: -#endif - { - struct inpcb *pcb = sotoinpcb(so); + SPTREE_WLOCK(); + for (i = 0; i < count; i++) { + IPSEC_ASSERT(spp[i]->spidx.dir == IPSEC_DIR_INBOUND || + spp[i]->spidx.dir == IPSEC_DIR_OUTBOUND, + ("invalid direction %u", spp[i]->spidx.dir)); - /* Does it have a PCB ? */ - if (pcb == NULL) - return; - key_freesp_so(&pcb->inp_sp->sp_in); - key_freesp_so(&pcb->inp_sp->sp_out); - } - break; -#endif /* INET || INET6 */ - default: - ipseclog((LOG_DEBUG, "%s: unknown address family=%d.\n", - __func__, so->so_proto->pr_domain->dom_family)); - return; + if (spp[i]->state != IPSEC_SPSTATE_IFNET) + continue; + spp[i]->state = IPSEC_SPSTATE_DEAD; + TAILQ_REMOVE(&V_sptree_ifnet[spp[i]->spidx.dir], + spp[i], chain); + LIST_REMOVE(spp[i], idhash); } -} + SPTREE_WUNLOCK(); -static void -key_freesp_so(struct secpolicy **sp) -{ - IPSEC_ASSERT(sp != NULL && *sp != NULL, ("null sp")); - - if ((*sp)->policy == IPSEC_POLICY_ENTRUST || - (*sp)->policy == IPSEC_POLICY_BYPASS) - return; - - IPSEC_ASSERT((*sp)->policy == IPSEC_POLICY_IPSEC, - ("invalid policy %u", (*sp)->policy)); - KEY_FREESP(sp); -} - -void -key_addrefsa(struct secasvar *sav, const char* where, int tag) -{ - - IPSEC_ASSERT(sav != NULL, ("null sav")); - IPSEC_ASSERT(sav->refcnt > 0, ("refcount must exist")); - - sa_addref(sav); + for (i = 0; i < count; i++) { + m = key_setdumpsp(spp[i], SADB_X_SPDDELETE, 0, 0); + if (m != NULL) + key_sendup_mbuf(NULL, m, KEY_SENDUP_ALL); + } } /* @@ -1245,58 +1223,57 @@ key_addrefsa(struct secasvar *sav, const char* where, int tag) * for a policy. */ void -key_freesav(struct secasvar **psav, const char* where, int tag) +key_freesav(struct secasvar **psav) { struct secasvar *sav = *psav; IPSEC_ASSERT(sav != NULL, ("null sav")); + if (SAV_DELREF(sav) == 0) + return; - if (sa_delref(sav)) { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s SA:%p (SPI %u) from %s:%u; refcnt now %u\n", - __func__, sav, ntohl(sav->spi), where, tag, sav->refcnt)); - *psav = NULL; - key_delsav(sav); - } else { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s SA:%p (SPI %u) from %s:%u; refcnt now %u\n", - __func__, sav, ntohl(sav->spi), where, tag, sav->refcnt)); - } + KEYDBG(IPSEC_STAMP, + printf("%s: last reference to SA(%p)\n", __func__, sav)); + + *psav = NULL; + key_delsav(sav); } -/* %%% SPD management */ /* - * free security policy entry. + * Unlink SA from SAH and SPI hash under SAHTREE_WLOCK. + * Expect that SA has extra reference due to lookup. + * Release this references, also release SAH reference after unlink. */ static void -key_delsp(struct secpolicy *sp) +key_unlinksav(struct secasvar *sav) { - struct ipsecrequest *isr, *nextisr; - - IPSEC_ASSERT(sp != NULL, ("null sp")); - SPTREE_LOCK_ASSERT(); - - sp->state = IPSEC_SPSTATE_DEAD; - - IPSEC_ASSERT(sp->refcnt == 0, - ("SP with references deleted (refcnt %u)", sp->refcnt)); - - /* remove from SP index */ - if (__LIST_CHAINED(sp)) - LIST_REMOVE(sp, chain); + struct secashead *sah; - for (isr = sp->req; isr != NULL; isr = nextisr) { - if (isr->sav != NULL) { - KEY_FREESAV(&isr->sav); - isr->sav = NULL; - } + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); - nextisr = isr->next; - ipsec_delisr(isr); + SAHTREE_UNLOCK_ASSERT(); + SAHTREE_WLOCK(); + if (sav->state == SADB_SASTATE_DEAD) { + /* SA is already unlinked */ + SAHTREE_WUNLOCK(); + return; } - _key_delsp(sp); + /* Unlink from SAH */ + if (sav->state == SADB_SASTATE_LARVAL) + TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); + else + TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); + /* Unlink from SPI hash */ + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + sah = sav->sah; + SAHTREE_WUNLOCK(); + key_freesav(&sav); + /* Since we are unlinked, release reference to SAH */ + key_freesah(&sah); } +/* %%% SPD management */ /* * search SPD * OUT: NULL : not found @@ -1305,20 +1282,19 @@ key_delsp(struct secpolicy *sp) static struct secpolicy * key_getsp(struct secpolicyindex *spidx) { + SPTREE_RLOCK_TRACKER; struct secpolicy *sp; IPSEC_ASSERT(spidx != NULL, ("null spidx")); - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[spidx->dir], chain) { - if (sp->state == IPSEC_SPSTATE_DEAD) - continue; + SPTREE_RLOCK(); + TAILQ_FOREACH(sp, &V_sptree[spidx->dir], chain) { if (key_cmpspidx_exactly(spidx, &sp->spidx)) { SP_ADDREF(sp); break; } } - SPTREE_UNLOCK(); + SPTREE_RUNLOCK(); return sp; } @@ -1326,73 +1302,58 @@ key_getsp(struct secpolicyindex *spidx) /* * get SP by index. * OUT: NULL : not found - * others : found, pointer to a SP. + * others : found, pointer to referenced SP. */ static struct secpolicy * -key_getspbyid(u_int32_t id) +key_getspbyid(uint32_t id) { + SPTREE_RLOCK_TRACKER; struct secpolicy *sp; - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[IPSEC_DIR_INBOUND], chain) { - if (sp->state == IPSEC_SPSTATE_DEAD) - continue; - if (sp->id == id) { - SP_ADDREF(sp); - goto done; - } - } - - LIST_FOREACH(sp, &V_sptree[IPSEC_DIR_OUTBOUND], chain) { - if (sp->state == IPSEC_SPSTATE_DEAD) - continue; + SPTREE_RLOCK(); + LIST_FOREACH(sp, SPHASH_HASH(id), idhash) { if (sp->id == id) { SP_ADDREF(sp); - goto done; + break; } } -done: - SPTREE_UNLOCK(); - - return sp; + SPTREE_RUNLOCK(); + return (sp); } struct secpolicy * -key_newsp(const char* where, int tag) +key_newsp(void) { - struct secpolicy *newsp = NULL; + struct secpolicy *sp; - newsp = (struct secpolicy *) - malloc(sizeof(struct secpolicy), M_IPSEC_SP, M_NOWAIT|M_ZERO); - if (newsp) { - SECPOLICY_LOCK_INIT(newsp); - newsp->refcnt = 1; - newsp->req = NULL; - } + sp = malloc(sizeof(*sp), M_IPSEC_SP, M_NOWAIT | M_ZERO); + if (sp != NULL) + SP_INITREF(sp); + return (sp); +} + +struct ipsecrequest * +ipsec_newisr(void) +{ - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u return SP:%p\n", __func__, - where, tag, newsp)); - return newsp; + return (malloc(sizeof(struct ipsecrequest), M_IPSEC_SR, + M_NOWAIT | M_ZERO)); } -static void -_key_delsp(struct secpolicy *sp) +void +ipsec_delisr(struct ipsecrequest *p) { - SECPOLICY_LOCK_DESTROY(sp); - free(sp, M_IPSEC_SP); + + free(p, M_IPSEC_SR); } /* * create secpolicy structure from sadb_x_policy structure. - * NOTE: `state', `secpolicyindex' in secpolicy structure are not set, - * so must be set properly later. + * NOTE: `state', `secpolicyindex' and 'id' in secpolicy structure + * are not set, so must be set properly later. */ struct secpolicy * -key_msg2sp(xpl0, len, error) - struct sadb_x_policy *xpl0; - size_t len; - int *error; +key_msg2sp(struct sadb_x_policy *xpl0, size_t len, int *error) { struct secpolicy *newsp; @@ -1405,13 +1366,15 @@ key_msg2sp(xpl0, len, error) return NULL; } - if ((newsp = KEY_NEWSP()) == NULL) { + if ((newsp = key_newsp()) == NULL) { *error = ENOBUFS; return NULL; } newsp->spidx.dir = xpl0->sadb_x_policy_dir; newsp->policy = xpl0->sadb_x_policy_type; + newsp->priority = xpl0->sadb_x_policy_priority; + newsp->tcount = 0; /* check policy */ switch (xpl0->sadb_x_policy_type) { @@ -1419,20 +1382,19 @@ key_msg2sp(xpl0, len, error) case IPSEC_POLICY_NONE: case IPSEC_POLICY_ENTRUST: case IPSEC_POLICY_BYPASS: - newsp->req = NULL; break; case IPSEC_POLICY_IPSEC: { - int tlen; struct sadb_x_ipsecrequest *xisr; - struct ipsecrequest **p_isr = &newsp->req; + struct ipsecrequest *isr; + int tlen; /* validity check */ if (PFKEY_EXTLEN(xpl0) < sizeof(*xpl0)) { ipseclog((LOG_DEBUG, "%s: Invalid msg length.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } @@ -1445,22 +1407,33 @@ key_msg2sp(xpl0, len, error) if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr)) { ipseclog((LOG_DEBUG, "%s: invalid ipsecrequest " "length.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } + if (newsp->tcount >= IPSEC_MAXREQ) { + ipseclog((LOG_DEBUG, + "%s: too many ipsecrequests.\n", + __func__)); + key_freesp(&newsp); + *error = EINVAL; + return (NULL); + } + /* allocate request buffer */ /* NB: data structure is zero'd */ - *p_isr = ipsec_newisr(); - if ((*p_isr) == NULL) { + isr = ipsec_newisr(); + if (isr == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = ENOBUFS; return NULL; } + newsp->req[newsp->tcount++] = isr; + /* set values */ switch (xisr->sadb_x_ipsecrequest_proto) { case IPPROTO_ESP: @@ -1471,11 +1444,12 @@ key_msg2sp(xpl0, len, error) ipseclog((LOG_DEBUG, "%s: invalid proto type=%u\n", __func__, xisr->sadb_x_ipsecrequest_proto)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EPROTONOSUPPORT; return NULL; } - (*p_isr)->saidx.proto = xisr->sadb_x_ipsecrequest_proto; + isr->saidx.proto = + (uint8_t)xisr->sadb_x_ipsecrequest_proto; switch (xisr->sadb_x_ipsecrequest_mode) { case IPSEC_MODE_TRANSPORT: @@ -1486,11 +1460,11 @@ key_msg2sp(xpl0, len, error) ipseclog((LOG_DEBUG, "%s: invalid mode=%u\n", __func__, xisr->sadb_x_ipsecrequest_mode)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } - (*p_isr)->saidx.mode = xisr->sadb_x_ipsecrequest_mode; + isr->saidx.mode = xisr->sadb_x_ipsecrequest_mode; switch (xisr->sadb_x_ipsecrequest_level) { case IPSEC_LEVEL_DEFAULT: @@ -1517,16 +1491,16 @@ key_msg2sp(xpl0, len, error) if (xisr->sadb_x_ipsecrequest_reqid == 0) { u_int32_t reqid; if ((reqid = key_newreqid()) == 0) { - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = ENOBUFS; return NULL; } - (*p_isr)->saidx.reqid = reqid; + isr->saidx.reqid = reqid; xisr->sadb_x_ipsecrequest_reqid = reqid; } else { /* set it for manual keying. */ - (*p_isr)->saidx.reqid = - xisr->sadb_x_ipsecrequest_reqid; + isr->saidx.reqid = + xisr->sadb_x_ipsecrequest_reqid; } break; @@ -1534,59 +1508,72 @@ key_msg2sp(xpl0, len, error) ipseclog((LOG_DEBUG, "%s: invalid level=%u\n", __func__, xisr->sadb_x_ipsecrequest_level)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } - (*p_isr)->level = xisr->sadb_x_ipsecrequest_level; + isr->level = xisr->sadb_x_ipsecrequest_level; /* set IP addresses if there */ if (xisr->sadb_x_ipsecrequest_len > sizeof(*xisr)) { struct sockaddr *paddr; paddr = (struct sockaddr *)(xisr + 1); - /* validity check */ if (paddr->sa_len - > sizeof((*p_isr)->saidx.src)) { + > sizeof(isr->saidx.src)) { ipseclog((LOG_DEBUG, "%s: invalid " "request address length.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } - bcopy(paddr, &(*p_isr)->saidx.src, - paddr->sa_len); - - paddr = (struct sockaddr *)((caddr_t)paddr - + paddr->sa_len); + bcopy(paddr, &isr->saidx.src, paddr->sa_len); + paddr = (struct sockaddr *)((caddr_t)paddr + + paddr->sa_len); /* validity check */ if (paddr->sa_len - > sizeof((*p_isr)->saidx.dst)) { + > sizeof(isr->saidx.dst)) { ipseclog((LOG_DEBUG, "%s: invalid " "request address length.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } - bcopy(paddr, &(*p_isr)->saidx.dst, - paddr->sa_len); + /* AF family should match */ + if (paddr->sa_family != + isr->saidx.src.sa.sa_family) { + ipseclog((LOG_DEBUG, "%s: address " + "family doesn't match.\n", + __func__)); + key_freesp(&newsp); + *error = EINVAL; + return (NULL); + } + bcopy(paddr, &isr->saidx.dst, paddr->sa_len); + } else { + /* + * Addresses for TUNNEL mode requests are + * mandatory. + */ + if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { + ipseclog((LOG_DEBUG, "%s: missing " + "request addresses.\n", __func__)); + key_freesp(&newsp); + *error = EINVAL; + return (NULL); + } } - - (*p_isr)->sp = newsp; - - /* initialization for the next. */ - p_isr = &(*p_isr)->next; tlen -= xisr->sadb_x_ipsecrequest_len; /* validity check */ if (tlen < 0) { ipseclog((LOG_DEBUG, "%s: becoming tlen < 0.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } @@ -1594,76 +1581,114 @@ key_msg2sp(xpl0, len, error) xisr = (struct sadb_x_ipsecrequest *)((caddr_t)xisr + xisr->sadb_x_ipsecrequest_len); } + /* XXXAE: LARVAL SP */ + if (newsp->tcount < 1) { + ipseclog((LOG_DEBUG, "%s: valid IPSEC transforms " + "not found.\n", __func__)); + key_freesp(&newsp); + *error = EINVAL; + return (NULL); + } } break; default: ipseclog((LOG_DEBUG, "%s: invalid policy type.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } *error = 0; - return newsp; + return (newsp); } -static u_int32_t -key_newreqid() +uint32_t +key_newreqid(void) { - static u_int32_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; + static uint32_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; - auto_reqid = (auto_reqid == ~0 - ? IPSEC_MANUAL_REQID_MAX + 1 : auto_reqid + 1); + if (auto_reqid == ~0) + auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; + else + auto_reqid++; /* XXX should be unique check */ - - return auto_reqid; + return (auto_reqid); } /* * copy secpolicy struct to sadb_x_policy structure indicated. */ -struct mbuf * -key_sp2msg(sp) - struct secpolicy *sp; +static struct mbuf * +key_sp2mbuf(struct secpolicy *sp) { - struct sadb_x_policy *xpl; - int tlen; - caddr_t p; struct mbuf *m; - - IPSEC_ASSERT(sp != NULL, ("null policy")); + size_t tlen; tlen = key_getspreqmsglen(sp); - - m = key_alloc_mbuf(tlen); - if (!m || m->m_next) { /*XXX*/ - if (m) - m_freem(m); - return NULL; + m = m_get2(tlen, M_NOWAIT, MT_DATA, 0); + if (m == NULL) + return (NULL); + m_align(m, tlen); + m->m_len = tlen; + if (key_sp2msg(sp, m->m_data, &tlen) != 0) { + m_freem(m); + return (NULL); } + return (m); +} - m->m_len = tlen; - m->m_next = NULL; - xpl = mtod(m, struct sadb_x_policy *); - bzero(xpl, tlen); +int +key_sp2msg(struct secpolicy *sp, void *request, size_t *len) +{ + struct sadb_x_ipsecrequest *xisr; + struct sadb_x_policy *xpl; + struct ipsecrequest *isr; + size_t xlen, ilen; + caddr_t p; + int error, i; - xpl->sadb_x_policy_len = PFKEY_UNIT64(tlen); + IPSEC_ASSERT(sp != NULL, ("null policy")); + + xlen = sizeof(*xpl); + if (*len < xlen) + return (EINVAL); + + error = 0; + bzero(request, *len); + xpl = (struct sadb_x_policy *)request; xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY; xpl->sadb_x_policy_type = sp->policy; xpl->sadb_x_policy_dir = sp->spidx.dir; xpl->sadb_x_policy_id = sp->id; - p = (caddr_t)xpl + sizeof(*xpl); + xpl->sadb_x_policy_priority = sp->priority; + switch (sp->state) { + case IPSEC_SPSTATE_IFNET: + xpl->sadb_x_policy_scope = IPSEC_POLICYSCOPE_IFNET; + break; + case IPSEC_SPSTATE_PCB: + xpl->sadb_x_policy_scope = IPSEC_POLICYSCOPE_PCB; + break; + default: + xpl->sadb_x_policy_scope = IPSEC_POLICYSCOPE_GLOBAL; + } /* if is the policy for ipsec ? */ if (sp->policy == IPSEC_POLICY_IPSEC) { - struct sadb_x_ipsecrequest *xisr; - struct ipsecrequest *isr; - - for (isr = sp->req; isr != NULL; isr = isr->next) { - + p = (caddr_t)xpl + sizeof(*xpl); + for (i = 0; i < sp->tcount; i++) { + isr = sp->req[i]; + ilen = PFKEY_ALIGN8(sizeof(*xisr) + + isr->saidx.src.sa.sa_len + + isr->saidx.dst.sa.sa_len); + xlen += ilen; + if (xlen > *len) { + error = ENOBUFS; + /* Calculate needed size */ + continue; + } xisr = (struct sadb_x_ipsecrequest *)p; - + xisr->sadb_x_ipsecrequest_len = ilen; xisr->sadb_x_ipsecrequest_proto = isr->saidx.proto; xisr->sadb_x_ipsecrequest_mode = isr->saidx.mode; xisr->sadb_x_ipsecrequest_level = isr->level; @@ -1673,31 +1698,21 @@ key_sp2msg(sp) bcopy(&isr->saidx.src, p, isr->saidx.src.sa.sa_len); p += isr->saidx.src.sa.sa_len; bcopy(&isr->saidx.dst, p, isr->saidx.dst.sa.sa_len); - p += isr->saidx.src.sa.sa_len; - - xisr->sadb_x_ipsecrequest_len = - PFKEY_ALIGN8(sizeof(*xisr) - + isr->saidx.src.sa.sa_len - + isr->saidx.dst.sa.sa_len); + p += isr->saidx.dst.sa.sa_len; } } - - return m; + xpl->sadb_x_policy_len = PFKEY_UNIT64(xlen); + if (error == 0) + *len = xlen; + else + *len = sizeof(*xpl); + return (error); } /* m will not be freed nor modified */ static struct mbuf * -#ifdef __STDC__ key_gather_mbuf(struct mbuf *m, const struct sadb_msghdr *mhp, - int ndeep, int nitem, ...) -#else -key_gather_mbuf(m, mhp, ndeep, nitem, va_alist) - struct mbuf *m; - const struct sadb_msghdr *mhp; - int ndeep; - int nitem; - va_dcl -#endif + int ndeep, int nitem, ...) { va_list ap; int idx; @@ -1725,7 +1740,7 @@ key_gather_mbuf(m, mhp, ndeep, nitem, va_alist) IPSEC_ASSERT(len <= MHLEN, ("header too big %u", len)); - MGETHDR(n, M_DONTWAIT, MT_DATA); + MGETHDR(n, M_NOWAIT, MT_DATA); if (!n) goto fail; n->m_len = len; @@ -1734,17 +1749,16 @@ key_gather_mbuf(m, mhp, ndeep, nitem, va_alist) mtod(n, caddr_t)); } else if (i < ndeep) { len = mhp->extlen[idx]; - n = key_alloc_mbuf(len); - if (!n || n->m_next) { /*XXX*/ - if (n) - m_freem(n); + n = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (n == NULL) goto fail; - } + m_align(n, len); + n->m_len = len; m_copydata(m, mhp->extoff[idx], mhp->extlen[idx], mtod(n, caddr_t)); } else { n = m_copym(m, mhp->extoff[idx], mhp->extlen[idx], - M_DONTWAIT); + M_NOWAIT); } if (n == NULL) goto fail; @@ -1784,18 +1798,16 @@ fail: * SPDSETIDX like SPDADD without a part of policy requests. * SPDUPDATE replace a unique policy entry. * + * XXXAE: serialize this in PF_KEY to avoid races. * m will always be freed. */ static int -key_spdadd(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + struct secpolicyindex spidx; struct sadb_address *src0, *dst0; struct sadb_x_policy *xpl0, *xpl; struct sadb_lifetime *lft = NULL; - struct secpolicyindex spidx; struct secpolicy *newsp; int error; @@ -1804,24 +1816,26 @@ key_spdadd(so, m, mhp) IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - mhp->ext[SADB_X_EXT_POLICY] == NULL) { - ipseclog((LOG_DEBUG, "key_spdadd: invalid message is passed.\n")); + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKHDR(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || - mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKLEN(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL) { - if (mhp->extlen[SADB_EXT_LIFETIME_HARD] - < sizeof(struct sadb_lifetime)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (!SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD)) { + if (SADB_CHECKLEN(mhp, SADB_EXT_LIFETIME_HARD)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); return key_senderror(so, m, EINVAL); } lft = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD]; @@ -1831,141 +1845,93 @@ key_spdadd(so, m, mhp) dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY]; - /* - * Note: do not parse SADB_X_EXT_NAT_T_* here: - * we are processing traffic endpoints. - */ - - /* make secindex */ - /* XXX boundary check against sa_len */ - KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, - src0 + 1, - dst0 + 1, - src0->sadb_address_prefixlen, - dst0->sadb_address_prefixlen, - src0->sadb_address_proto, - &spidx); - - /* checking the direciton. */ + /* check the direciton */ switch (xpl0->sadb_x_policy_dir) { case IPSEC_DIR_INBOUND: case IPSEC_DIR_OUTBOUND: break; default: - ipseclog((LOG_DEBUG, "%s: Invalid SP direction.\n", __func__)); - mhp->msg->sadb_msg_errno = EINVAL; - return 0; + ipseclog((LOG_DEBUG, "%s: invalid SP direction.\n", __func__)); + return key_senderror(so, m, EINVAL); } - - /* check policy */ /* key_spdadd() accepts DISCARD, NONE and IPSEC. */ - if (xpl0->sadb_x_policy_type == IPSEC_POLICY_ENTRUST - || xpl0->sadb_x_policy_type == IPSEC_POLICY_BYPASS) { - ipseclog((LOG_DEBUG, "%s: Invalid policy type.\n", __func__)); + if (xpl0->sadb_x_policy_type != IPSEC_POLICY_DISCARD && + xpl0->sadb_x_policy_type != IPSEC_POLICY_NONE && + xpl0->sadb_x_policy_type != IPSEC_POLICY_IPSEC) { + ipseclog((LOG_DEBUG, "%s: invalid policy type.\n", __func__)); return key_senderror(so, m, EINVAL); } /* policy requests are mandatory when action is ipsec. */ - if (mhp->msg->sadb_msg_type != SADB_X_SPDSETIDX - && xpl0->sadb_x_policy_type == IPSEC_POLICY_IPSEC - && mhp->extlen[SADB_X_EXT_POLICY] <= sizeof(*xpl0)) { - ipseclog((LOG_DEBUG, "%s: some policy requests part required\n", - __func__)); + if (xpl0->sadb_x_policy_type == IPSEC_POLICY_IPSEC && + mhp->extlen[SADB_X_EXT_POLICY] <= sizeof(*xpl0)) { + ipseclog((LOG_DEBUG, + "%s: policy requests required.\n", __func__)); return key_senderror(so, m, EINVAL); } - /* - * checking there is SP already or not. - * SPDUPDATE doesn't depend on whether there is a SP or not. - * If the type is either SPDADD or SPDSETIDX AND a SP is found, - * then error. - */ + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0 || + src0->sadb_address_proto != dst0->sadb_address_proto) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, error); + } + /* make secindex */ + KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, + src0 + 1, + dst0 + 1, + src0->sadb_address_prefixlen, + dst0->sadb_address_prefixlen, + src0->sadb_address_proto, + &spidx); + /* Checking there is SP already or not. */ newsp = key_getsp(&spidx); - if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { - if (newsp) { - SPTREE_LOCK(); - newsp->state = IPSEC_SPSTATE_DEAD; - SPTREE_UNLOCK(); - KEY_FREESP(&newsp); - } - } else { - if (newsp != NULL) { - KEY_FREESP(&newsp); - ipseclog((LOG_DEBUG, "%s: a SP entry exists already.\n", - __func__)); - return key_senderror(so, m, EEXIST); + if (newsp != NULL) { + if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { + KEYDBG(KEY_STAMP, + printf("%s: unlink SP(%p) for SPDUPDATE\n", + __func__, newsp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(newsp)); + key_unlink(newsp); + key_freesp(&newsp); + } else { + key_freesp(&newsp); + ipseclog((LOG_DEBUG, "%s: a SP entry exists already.", + __func__)); + return (key_senderror(so, m, EEXIST)); } } - /* allocation new SP entry */ + /* allocate new SP entry */ if ((newsp = key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error)) == NULL) { return key_senderror(so, m, error); } + newsp->lastused = newsp->created = time_second; + newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0; + newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0; + bcopy(&spidx, &newsp->spidx, sizeof(spidx)); + + /* XXXAE: there is race between key_getsp() and key_insertsp() */ + SPTREE_WLOCK(); if ((newsp->id = key_getnewspid()) == 0) { - _key_delsp(newsp); + SPTREE_WUNLOCK(); + key_freesp(&newsp); return key_senderror(so, m, ENOBUFS); } + key_insertsp(newsp); + SPTREE_WUNLOCK(); - /* XXX boundary check against sa_len */ - KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, - src0 + 1, - dst0 + 1, - src0->sadb_address_prefixlen, - dst0->sadb_address_prefixlen, - src0->sadb_address_proto, - &newsp->spidx); - - /* sanity check on addr pair */ - if (((struct sockaddr *)(src0 + 1))->sa_family != - ((struct sockaddr *)(dst0+ 1))->sa_family) { - _key_delsp(newsp); - return key_senderror(so, m, EINVAL); - } - if (((struct sockaddr *)(src0 + 1))->sa_len != - ((struct sockaddr *)(dst0+ 1))->sa_len) { - _key_delsp(newsp); - return key_senderror(so, m, EINVAL); - } -#if 1 - if (newsp->req && newsp->req->saidx.src.sa.sa_family && newsp->req->saidx.dst.sa.sa_family) { - if (newsp->req->saidx.src.sa.sa_family != newsp->req->saidx.dst.sa.sa_family) { - _key_delsp(newsp); - return key_senderror(so, m, EINVAL); - } - } -#endif - - newsp->created = time_second; - newsp->lastused = newsp->created; - newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0; - newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0; - - newsp->refcnt = 1; /* do not reclaim until I say I do */ - newsp->state = IPSEC_SPSTATE_ALIVE; - LIST_INSERT_TAIL(&V_sptree[newsp->spidx.dir], newsp, secpolicy, chain); - - /* delete the entry in spacqtree */ - if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { - struct secspacq *spacq = key_getspacq(&spidx); - if (spacq != NULL) { - /* reset counter in order to deletion by timehandler. */ - spacq->created = time_second; - spacq->count = 0; - SPACQ_UNLOCK(); - } - } + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, newsp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(newsp)); { struct mbuf *n, *mpolicy; struct sadb_msg *newmsg; int off; - /* - * Note: do not send SADB_X_EXT_NAT_T_* here: - * we are sending traffic endpoints. - */ - /* create new sadb_msg to reply. */ if (lft) { n = key_gather_mbuf(m, mhp, 2, 5, SADB_EXT_RESERVED, @@ -2013,30 +1979,32 @@ key_spdadd(so, m, mhp) * 0: failure. * others: success. */ -static u_int32_t -key_getnewspid() +static uint32_t +key_getnewspid(void) { - u_int32_t newid = 0; - int count = V_key_spi_trycnt; /* XXX */ struct secpolicy *sp; + uint32_t newid = 0; + int count = V_key_spi_trycnt; /* XXX */ - /* when requesting to allocate spi ranged */ + SPTREE_WLOCK_ASSERT(); while (count--) { - newid = (V_policy_id = (V_policy_id == ~0 ? 1 : V_policy_id + 1)); - - if ((sp = key_getspbyid(newid)) == NULL) + if (V_policy_id == ~0) /* overflowed */ + newid = V_policy_id = 1; + else + newid = ++V_policy_id; + LIST_FOREACH(sp, SPHASH_HASH(newid), idhash) { + if (sp->id == newid) + break; + } + if (sp == NULL) break; - - KEY_FREESP(&sp); } - if (count == 0 || newid == 0) { - ipseclog((LOG_DEBUG, "%s: to allocate policy id is failed.\n", - __func__)); - return 0; + ipseclog((LOG_DEBUG, "%s: failed to allocate policy id.\n", + __func__)); + return (0); } - - return newid; + return (newid); } /* @@ -2052,14 +2020,12 @@ key_getnewspid() * m will always be freed. */ static int -key_spddelete(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_spddelete(struct socket *so, struct mbuf *m, + const struct sadb_msghdr *mhp) { + struct secpolicyindex spidx; struct sadb_address *src0, *dst0; struct sadb_x_policy *xpl0; - struct secpolicyindex spidx; struct secpolicy *sp; IPSEC_ASSERT(so != NULL, ("null so")); @@ -2067,18 +2033,19 @@ key_spddelete(so, m, mhp) IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - mhp->ext[SADB_X_EXT_POLICY] == NULL) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKHDR(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || - mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKLEN(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } @@ -2086,13 +2053,29 @@ key_spddelete(so, m, mhp) dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY]; - /* - * Note: do not parse SADB_X_EXT_NAT_T_* here: - * we are processing traffic endpoints. - */ - + /* check the direciton */ + switch (xpl0->sadb_x_policy_dir) { + case IPSEC_DIR_INBOUND: + case IPSEC_DIR_OUTBOUND: + break; + default: + ipseclog((LOG_DEBUG, "%s: invalid SP direction.\n", __func__)); + return key_senderror(so, m, EINVAL); + } + /* Only DISCARD, NONE and IPSEC are allowed */ + if (xpl0->sadb_x_policy_type != IPSEC_POLICY_DISCARD && + xpl0->sadb_x_policy_type != IPSEC_POLICY_NONE && + xpl0->sadb_x_policy_type != IPSEC_POLICY_IPSEC) { + ipseclog((LOG_DEBUG, "%s: invalid policy type.\n", __func__)); + return key_senderror(so, m, EINVAL); + } + if (key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)) != 0 || + src0->sadb_address_proto != dst0->sadb_address_proto) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, EINVAL); + } /* make secindex */ - /* XXX boundary check against sa_len */ KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, src0 + 1, dst0 + 1, @@ -2101,16 +2084,6 @@ key_spddelete(so, m, mhp) src0->sadb_address_proto, &spidx); - /* checking the direciton. */ - switch (xpl0->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - case IPSEC_DIR_OUTBOUND: - break; - default: - ipseclog((LOG_DEBUG, "%s: Invalid SP direction.\n", __func__)); - return key_senderror(so, m, EINVAL); - } - /* Is there SP in SPD ? */ if ((sp = key_getsp(&spidx)) == NULL) { ipseclog((LOG_DEBUG, "%s: no SP found.\n", __func__)); @@ -2120,20 +2093,16 @@ key_spddelete(so, m, mhp) /* save policy id to buffer to be returned. */ xpl0->sadb_x_policy_id = sp->id; - SPTREE_LOCK(); - sp->state = IPSEC_SPSTATE_DEAD; - SPTREE_UNLOCK(); - KEY_FREESP(&sp); + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); + key_unlink(sp); + key_freesp(&sp); { struct mbuf *n; struct sadb_msg *newmsg; - /* - * Note: do not send SADB_X_EXT_NAT_T_* here: - * we are sending traffic endpoints. - */ - /* create new sadb_msg to reply. */ n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED, SADB_X_EXT_POLICY, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); @@ -2162,37 +2131,45 @@ key_spddelete(so, m, mhp) * m will always be freed. */ static int -key_spddelete2(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_spddelete2(struct socket *so, struct mbuf *m, + const struct sadb_msghdr *mhp) { - u_int32_t id; struct secpolicy *sp; + uint32_t id; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_X_EXT_POLICY] == NULL || - mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); + if (SADB_CHECKHDR(mhp, SADB_X_EXT_POLICY) || + SADB_CHECKLEN(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", + __func__)); return key_senderror(so, m, EINVAL); } - id = ((struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; + id = ((struct sadb_x_policy *) + mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; /* Is there SP in SPD ? */ if ((sp = key_getspbyid(id)) == NULL) { - ipseclog((LOG_DEBUG, "%s: no SP found id:%u.\n", __func__, id)); + ipseclog((LOG_DEBUG, "%s: no SP found for id %u.\n", + __func__, id)); return key_senderror(so, m, EINVAL); } - SPTREE_LOCK(); - sp->state = IPSEC_SPSTATE_DEAD; - SPTREE_UNLOCK(); - KEY_FREESP(&sp); + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); + key_unlink(sp); + if (sp->state != IPSEC_SPSTATE_DEAD) { + ipseclog((LOG_DEBUG, "%s: failed to delete SP with id %u.\n", + __func__, id)); + key_freesp(&sp); + return (key_senderror(so, m, EACCES)); + } + key_freesp(&sp); { struct mbuf *n, *nn; @@ -2202,10 +2179,9 @@ key_spddelete2(so, m, mhp) /* create new sadb_msg to reply. */ len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); - MGETHDR(n, M_DONTWAIT, MT_DATA); + MGETHDR(n, M_NOWAIT, MT_DATA); if (n && len > MHLEN) { - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { + if (!(MCLGET(n, M_NOWAIT))) { m_freem(n); n = NULL; } @@ -2224,7 +2200,7 @@ key_spddelete2(so, m, mhp) off, len)); n->m_next = m_copym(m, mhp->extoff[SADB_X_EXT_POLICY], - mhp->extlen[SADB_X_EXT_POLICY], M_DONTWAIT); + mhp->extlen[SADB_X_EXT_POLICY], M_NOWAIT); if (!n->m_next) { m_freem(n); return key_senderror(so, m, ENOBUFS); @@ -2244,7 +2220,7 @@ key_spddelete2(so, m, mhp) } /* - * SADB_X_GET processing + * SADB_X_SPDGET processing * receive * * from the user(?), @@ -2256,37 +2232,37 @@ key_spddelete2(so, m, mhp) * m will always be freed. */ static int -key_spdget(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_spdget(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - u_int32_t id; struct secpolicy *sp; struct mbuf *n; + uint32_t id; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_X_EXT_POLICY] == NULL || - mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { + if (SADB_CHECKHDR(mhp, SADB_X_EXT_POLICY) || + SADB_CHECKLEN(mhp, SADB_X_EXT_POLICY)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - id = ((struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; + id = ((struct sadb_x_policy *) + mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; /* Is there SP in SPD ? */ if ((sp = key_getspbyid(id)) == NULL) { - ipseclog((LOG_DEBUG, "%s: no SP found id:%u.\n", __func__, id)); + ipseclog((LOG_DEBUG, "%s: no SP found for id %u.\n", + __func__, id)); return key_senderror(so, m, ENOENT); } - n = key_setdumpsp(sp, SADB_X_SPDGET, 0, mhp->msg->sadb_msg_pid); - KEY_FREESP(&sp); + n = key_setdumpsp(sp, SADB_X_SPDGET, mhp->msg->sadb_msg_seq, + mhp->msg->sadb_msg_pid); + key_freesp(&sp); if (n != NULL) { m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); @@ -2300,7 +2276,7 @@ key_spdget(so, m, mhp) * send * * to KMD, and expect to receive - * with SADB_X_SPDACQUIRE if error occured, + * with SADB_X_SPDACQUIRE if error occurred, * or * * with SADB_X_SPDUPDATE from KMD by PF_KEY. @@ -2310,8 +2286,7 @@ key_spdget(so, m, mhp) * others: error number */ int -key_spdacquire(sp) - struct secpolicy *sp; +key_spdacquire(struct secpolicy *sp) { struct mbuf *result = NULL, *m; struct secspacq *newspacq; @@ -2330,7 +2305,8 @@ key_spdacquire(sp) } else { /* increment counter and do nothing. */ newspacq->count++; - return 0; + SPACQ_UNLOCK(); + return (0); } SPACQ_UNLOCK(); } else { @@ -2370,13 +2346,11 @@ key_spdacquire(sp) * m will always be freed. */ static int -key_spdflush(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_spdflush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + struct secpolicy_queue drainq; struct sadb_msg *newmsg; - struct secpolicy *sp; + struct secpolicy *sp, *nextsp; u_int dir; IPSEC_ASSERT(so != NULL, ("null socket")); @@ -2387,11 +2361,27 @@ key_spdflush(so, m, mhp) if (m->m_len != PFKEY_ALIGN8(sizeof(struct sadb_msg))) return key_senderror(so, m, EINVAL); + TAILQ_INIT(&drainq); + SPTREE_WLOCK(); for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[dir], chain) - sp->state = IPSEC_SPSTATE_DEAD; - SPTREE_UNLOCK(); + TAILQ_CONCAT(&drainq, &V_sptree[dir], chain); + } + /* + * We need to set state to DEAD for each policy to be sure, + * that another thread won't try to unlink it. + * Also remove SP from sphash. + */ + TAILQ_FOREACH(sp, &drainq, chain) { + sp->state = IPSEC_SPSTATE_DEAD; + LIST_REMOVE(sp, idhash); + } + V_sp_genid++; + SPTREE_WUNLOCK(); + sp = TAILQ_FIRST(&drainq); + while (sp != NULL) { + nextsp = TAILQ_NEXT(sp, chain); + key_freesp(&sp); + sp = nextsp; } if (sizeof(struct sadb_msg) > m->m_len + M_TRAILINGSPACE(m)) { @@ -2410,27 +2400,34 @@ key_spdflush(so, m, mhp) return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } +static uint8_t +key_satype2scopemask(uint8_t satype) +{ + + if (satype == IPSEC_POLICYSCOPE_ANY) + return (0xff); + return (satype); +} /* * SADB_SPDDUMP processing * receive * - * from the user, and dump all SP leaves - * and send, + * from the user, and dump all SP leaves and send, * ..... * to the ikmpd. * - * m will always be freed. + * NOTE: + * sadb_msg_satype is considered as mask of policy scopes. + * m will always be freed. */ static int -key_spddump(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_spddump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + SPTREE_RLOCK_TRACKER; struct secpolicy *sp; - int cnt; - u_int dir; struct mbuf *n; + int cnt; + u_int dir, scope; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); @@ -2439,36 +2436,55 @@ key_spddump(so, m, mhp) /* search SPD entry and get buffer size. */ cnt = 0; - SPTREE_LOCK(); + scope = key_satype2scopemask(mhp->msg->sadb_msg_satype); + SPTREE_RLOCK(); for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { - LIST_FOREACH(sp, &V_sptree[dir], chain) { - cnt++; + if (scope & IPSEC_POLICYSCOPE_GLOBAL) { + TAILQ_FOREACH(sp, &V_sptree[dir], chain) + cnt++; + } + if (scope & IPSEC_POLICYSCOPE_IFNET) { + TAILQ_FOREACH(sp, &V_sptree_ifnet[dir], chain) + cnt++; } } if (cnt == 0) { - SPTREE_UNLOCK(); + SPTREE_RUNLOCK(); return key_senderror(so, m, ENOENT); } for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { - LIST_FOREACH(sp, &V_sptree[dir], chain) { - --cnt; - n = key_setdumpsp(sp, SADB_X_SPDDUMP, cnt, - mhp->msg->sadb_msg_pid); + if (scope & IPSEC_POLICYSCOPE_GLOBAL) { + TAILQ_FOREACH(sp, &V_sptree[dir], chain) { + --cnt; + n = key_setdumpsp(sp, SADB_X_SPDDUMP, cnt, + mhp->msg->sadb_msg_pid); + + if (n != NULL) + key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + } + } + if (scope & IPSEC_POLICYSCOPE_IFNET) { + TAILQ_FOREACH(sp, &V_sptree_ifnet[dir], chain) { + --cnt; + n = key_setdumpsp(sp, SADB_X_SPDDUMP, cnt, + mhp->msg->sadb_msg_pid); - if (n) - key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + if (n != NULL) + key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + } } } - SPTREE_UNLOCK(); + SPTREE_RUNLOCK(); m_freem(m); - return 0; + return (0); } static struct mbuf * -key_setdumpsp(struct secpolicy *sp, u_int8_t type, u_int32_t seq, u_int32_t pid) +key_setdumpsp(struct secpolicy *sp, u_int8_t type, u_int32_t seq, + u_int32_t pid) { struct mbuf *result = NULL, *m; struct seclifetime lt; @@ -2478,10 +2494,6 @@ key_setdumpsp(struct secpolicy *sp, u_int8_t type, u_int32_t seq, u_int32_t pid) goto fail; result = m; - /* - * Note: do not send SADB_X_EXT_NAT_T_* here: - * we are sending traffic endpoints. - */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sp->spidx.src.sa, sp->spidx.prefs, sp->spidx.ul_proto); @@ -2496,7 +2508,7 @@ key_setdumpsp(struct secpolicy *sp, u_int8_t type, u_int32_t seq, u_int32_t pid) goto fail; m_cat(result, m); - m = key_sp2msg(sp); + m = key_sp2mbuf(sp); if (!m) goto fail; m_cat(result, m); @@ -2539,37 +2551,29 @@ fail: m_freem(result); return NULL; } - /* * get PFKEY message length for security policy and request. */ -static u_int -key_getspreqmsglen(sp) - struct secpolicy *sp; +static size_t +key_getspreqmsglen(struct secpolicy *sp) { - u_int tlen; + size_t tlen, len; + int i; tlen = sizeof(struct sadb_x_policy); - /* if is the policy for ipsec ? */ if (sp->policy != IPSEC_POLICY_IPSEC) - return tlen; + return (tlen); /* get length of ipsec requests */ - { - struct ipsecrequest *isr; - int len; - - for (isr = sp->req; isr != NULL; isr = isr->next) { + for (i = 0; i < sp->tcount; i++) { len = sizeof(struct sadb_x_ipsecrequest) - + isr->saidx.src.sa.sa_len - + isr->saidx.dst.sa.sa_len; + + sp->req[i]->saidx.src.sa.sa_len + + sp->req[i]->saidx.dst.sa.sa_len; tlen += PFKEY_ALIGN8(len); } - } - - return tlen; + return (tlen); } /* @@ -2582,18 +2586,18 @@ key_getspreqmsglen(sp) * others : error number */ static int -key_spdexpire(sp) - struct secpolicy *sp; +key_spdexpire(struct secpolicy *sp) { - struct mbuf *result = NULL, *m; - int len; - int error = -1; struct sadb_lifetime *lt; - - /* XXX: Why do we lock ? */ + struct mbuf *result = NULL, *m; + int len, error = -1; IPSEC_ASSERT(sp != NULL, ("null secpolicy")); + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); + /* set msg header */ m = key_setsadbmsg(SADB_X_SPDEXPIRE, 0, 0, 0, 0, 0); if (!m) { @@ -2604,13 +2608,13 @@ key_spdexpire(sp) /* create lifetime extension (current and hard) */ len = PFKEY_ALIGN8(sizeof(*lt)) * 2; - m = key_alloc_mbuf(len); - if (!m || m->m_next) { /*XXX*/ - if (m) - m_freem(m); + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) { error = ENOBUFS; goto fail; } + m_align(m, len); + m->m_len = len; bzero(mtod(m, caddr_t), len); lt = mtod(m, struct sadb_lifetime *); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); @@ -2628,11 +2632,6 @@ key_spdexpire(sp) lt->sadb_lifetime_usetime = sp->validtime; m_cat(result, m); - /* - * Note: do not send SADB_X_EXT_NAT_T_* here: - * we are sending traffic endpoints. - */ - /* set sadb_address for source */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sp->spidx.src.sa, @@ -2654,7 +2653,7 @@ key_spdexpire(sp) m_cat(result, m); /* set secpolicy */ - m = key_sp2msg(sp); + m = key_sp2mbuf(sp); if (!m) { error = ENOBUFS; goto fail; @@ -2691,185 +2690,220 @@ key_spdexpire(sp) /* %%% SAD management */ /* - * allocating a memory for new SA head, and copy from the values of mhp. + * allocating and initialize new SA head. * OUT: NULL : failure due to the lack of memory. * others : pointer to new SA head. */ static struct secashead * -key_newsah(saidx) - struct secasindex *saidx; +key_newsah(struct secasindex *saidx) { - struct secashead *newsah; + struct secashead *sah; - IPSEC_ASSERT(saidx != NULL, ("null saidx")); + sah = malloc(sizeof(struct secashead), M_IPSEC_SAH, + M_NOWAIT | M_ZERO); + if (sah == NULL) { + PFKEYSTAT_INC(in_nomem); + return (NULL); + } + TAILQ_INIT(&sah->savtree_larval); + TAILQ_INIT(&sah->savtree_alive); + sah->saidx = *saidx; + sah->state = SADB_SASTATE_DEAD; + SAH_INITREF(sah); + + KEYDBG(KEY_STAMP, + printf("%s: SAH(%p)\n", __func__, sah)); + KEYDBG(KEY_DATA, kdebug_secash(sah, NULL)); + return (sah); +} + +static void +key_freesah(struct secashead **psah) +{ + struct secashead *sah = *psah; - newsah = malloc(sizeof(struct secashead), M_IPSEC_SAH, M_NOWAIT|M_ZERO); - if (newsah != NULL) { - int i; - for (i = 0; i < sizeof(newsah->savtree)/sizeof(newsah->savtree[0]); i++) - LIST_INIT(&newsah->savtree[i]); - newsah->saidx = *saidx; + if (SAH_DELREF(sah) == 0) + return; - /* add to saidxtree */ - newsah->state = SADB_SASTATE_MATURE; + KEYDBG(KEY_STAMP, + printf("%s: last reference to SAH(%p)\n", __func__, sah)); + KEYDBG(KEY_DATA, kdebug_secash(sah, NULL)); - SAHTREE_LOCK(); - LIST_INSERT_HEAD(&V_sahtree, newsah, chain); - SAHTREE_UNLOCK(); - } - return(newsah); + *psah = NULL; + key_delsah(sah); } -/* - * delete SA index and all SA registerd. - */ static void -key_delsah(sah) - struct secashead *sah; +key_delsah(struct secashead *sah) { - struct secasvar *sav, *nextsav; - u_int stateidx; - int zombie = 0; - IPSEC_ASSERT(sah != NULL, ("NULL sah")); - SAHTREE_LOCK_ASSERT(); - - /* searching all SA registerd in the secindex. */ - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_any); - stateidx++) { - u_int state = saorder_state_any[stateidx]; - LIST_FOREACH_SAFE(sav, &sah->savtree[state], chain, nextsav) { - if (sav->refcnt == 0) { - /* sanity check */ - KEY_CHKSASTATE(state, sav->state, __func__); - /* - * do NOT call KEY_FREESAV here: - * it will only delete the sav if refcnt == 1, - * where we already know that refcnt == 0 - */ - key_delsav(sav); - } else { - /* give up to delete this sa */ - zombie++; - } - } - } - if (!zombie) { /* delete only if there are savs */ - /* remove from tree of SA index */ - if (__LIST_CHAINED(sah)) - LIST_REMOVE(sah, chain); - if (sah->route_cache.sa_route.ro_rt) { - RTFREE(sah->route_cache.sa_route.ro_rt); - sah->route_cache.sa_route.ro_rt = (struct rtentry *)NULL; - } - free(sah, M_IPSEC_SAH); - } + IPSEC_ASSERT(sah->state == SADB_SASTATE_DEAD, + ("Attempt to free non DEAD SAH %p", sah)); + IPSEC_ASSERT(TAILQ_EMPTY(&sah->savtree_larval), + ("Attempt to free SAH %p with LARVAL SA", sah)); + IPSEC_ASSERT(TAILQ_EMPTY(&sah->savtree_alive), + ("Attempt to free SAH %p with ALIVE SA", sah)); + + free(sah, M_IPSEC_SAH); } /* - * allocating a new SA with LARVAL state. key_add() and key_getspi() call, + * allocating a new SA for key_add() and key_getspi() call, * and copy the values of mhp into new buffer. - * When SAD message type is GETSPI: - * to set sequence number from acq_seq++, - * to set zero to SPI. - * not to call key_setsava(). + * When SAD message type is SADB_GETSPI set SA state to LARVAL. + * For SADB_ADD create and initialize SA with MATURE state. * OUT: NULL : fail * others : pointer to new secasvar. - * - * does not modify mbuf. does not free mbuf on error. */ static struct secasvar * -key_newsav(m, mhp, sah, errp, where, tag) - struct mbuf *m; - const struct sadb_msghdr *mhp; - struct secashead *sah; - int *errp; - const char* where; - int tag; +key_newsav(const struct sadb_msghdr *mhp, struct secasindex *saidx, + uint32_t spi, int *errp) { - struct secasvar *newsav; - const struct sadb_sa *xsa; + struct secashead *sah; + struct secasvar *sav; + int isnew; - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - IPSEC_ASSERT(sah != NULL, ("null secashead")); - - newsav = malloc(sizeof(struct secasvar), M_IPSEC_SA, M_NOWAIT|M_ZERO); - if (newsav == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - *errp = ENOBUFS; - goto done; - } - - switch (mhp->msg->sadb_msg_type) { - case SADB_GETSPI: - newsav->spi = 0; + IPSEC_ASSERT(mhp->msg->sadb_msg_type == SADB_GETSPI || + mhp->msg->sadb_msg_type == SADB_ADD, ("wrong message type")); -#ifdef IPSEC_DOSEQCHECK - /* sync sequence number */ - if (mhp->msg->sadb_msg_seq == 0) - newsav->seq = - (V_acq_seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq)); - else -#endif - newsav->seq = mhp->msg->sadb_msg_seq; - break; - - case SADB_ADD: - /* sanity check */ - if (mhp->ext[SADB_EXT_SA] == NULL) { - free(newsav, M_IPSEC_SA); - newsav = NULL; - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + sav = NULL; + sah = NULL; + /* check SPI value */ + switch (saidx->proto) { + case IPPROTO_ESP: + case IPPROTO_AH: + /* + * RFC 4302, 2.4. Security Parameters Index (SPI), SPI values + * 1-255 reserved by IANA for future use, + * 0 for implementation specific, local use. + */ + if (ntohl(spi) <= 255) { + ipseclog((LOG_DEBUG, "%s: illegal range of SPI %u.\n", + __func__, ntohl(spi))); *errp = EINVAL; goto done; } - xsa = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; - newsav->spi = xsa->sadb_sa_spi; - newsav->seq = mhp->msg->sadb_msg_seq; break; - default: - free(newsav, M_IPSEC_SA); - newsav = NULL; - *errp = EINVAL; - goto done; } + sav = malloc(sizeof(struct secasvar), M_IPSEC_SA, M_NOWAIT | M_ZERO); + if (sav == NULL) { + *errp = ENOBUFS; + goto done; + } + sav->lock = malloc(sizeof(struct mtx), M_IPSEC_MISC, + M_NOWAIT | M_ZERO); + if (sav->lock == NULL) { + *errp = ENOBUFS; + goto done; + } + mtx_init(sav->lock, "ipsec association", NULL, MTX_DEF); + sav->lft_c = uma_zalloc(V_key_lft_zone, M_NOWAIT); + if (sav->lft_c == NULL) { + *errp = ENOBUFS; + goto done; + } + counter_u64_zero(sav->lft_c_allocations); + counter_u64_zero(sav->lft_c_bytes); - /* copy sav values */ - if (mhp->msg->sadb_msg_type != SADB_GETSPI) { - *errp = key_setsaval(newsav, m, mhp); - if (*errp) { - free(newsav, M_IPSEC_SA); - newsav = NULL; + sav->spi = spi; + sav->seq = mhp->msg->sadb_msg_seq; + sav->state = SADB_SASTATE_LARVAL; + sav->pid = (pid_t)mhp->msg->sadb_msg_pid; + SAV_INITREF(sav); +again: + sah = key_getsah(saidx); + if (sah == NULL) { + /* create a new SA index */ + sah = key_newsah(saidx); + if (sah == NULL) { + ipseclog((LOG_DEBUG, + "%s: No more memory.\n", __func__)); + *errp = ENOBUFS; goto done; } - } - - SECASVAR_LOCK_INIT(newsav); - - /* reset created */ - newsav->created = time_second; - newsav->pid = mhp->msg->sadb_msg_pid; + isnew = 1; + } else + isnew = 0; - /* add to satree */ - newsav->sah = sah; - sa_initref(newsav); - newsav->state = SADB_SASTATE_LARVAL; + sav->sah = sah; + if (mhp->msg->sadb_msg_type == SADB_GETSPI) { + sav->created = time_second; + } else if (sav->state == SADB_SASTATE_LARVAL) { + /* + * Do not call key_setsaval() second time in case + * of `goto again`. We will have MATURE state. + */ + *errp = key_setsaval(sav, mhp); + if (*errp != 0) + goto done; + sav->state = SADB_SASTATE_MATURE; + } - SAHTREE_LOCK(); - LIST_INSERT_TAIL(&sah->savtree[SADB_SASTATE_LARVAL], newsav, - secasvar, chain); - SAHTREE_UNLOCK(); + SAHTREE_WLOCK(); + /* + * Check that existing SAH wasn't unlinked. + * Since we didn't hold the SAHTREE lock, it is possible, + * that callout handler or key_flush() or key_delete() could + * unlink this SAH. + */ + if (isnew == 0 && sah->state == SADB_SASTATE_DEAD) { + SAHTREE_WUNLOCK(); + key_freesah(&sah); /* reference from key_getsah() */ + goto again; + } + if (isnew != 0) { + /* + * Add new SAH into SADB. + * + * XXXAE: we can serialize key_add and key_getspi calls, so + * several threads will not fight in the race. + * Otherwise we should check under SAHTREE lock, that this + * SAH would not added twice. + */ + TAILQ_INSERT_HEAD(&V_sahtree, sah, chain); + /* Add new SAH into hash by addresses */ + LIST_INSERT_HEAD(SAHADDRHASH_HASH(saidx), sah, addrhash); + /* Now we are linked in the chain */ + sah->state = SADB_SASTATE_MATURE; + /* + * SAV references this new SAH. + * In case of existing SAH we reuse reference + * from key_getsah(). + */ + SAH_ADDREF(sah); + } + /* Link SAV with SAH */ + if (sav->state == SADB_SASTATE_MATURE) + TAILQ_INSERT_HEAD(&sah->savtree_alive, sav, chain); + else + TAILQ_INSERT_HEAD(&sah->savtree_larval, sav, chain); + /* Add SAV into SPI hash */ + LIST_INSERT_HEAD(SAVHASH_HASH(sav->spi), sav, spihash); + SAHTREE_WUNLOCK(); + *errp = 0; /* success */ done: - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u return SP:%p\n", __func__, - where, tag, newsav)); - - return newsav; + if (*errp != 0) { + if (sav != NULL) { + if (sav->lock != NULL) { + mtx_destroy(sav->lock); + free(sav->lock, M_IPSEC_MISC); + } + if (sav->lft_c != NULL) + uma_zfree(V_key_lft_zone, sav->lft_c); + free(sav, M_IPSEC_SA), sav = NULL; + } + if (sah != NULL) + key_freesah(&sah); + if (*errp == ENOBUFS) { + ipseclog((LOG_DEBUG, "%s: No more memory.\n", + __func__)); + PFKEYSTAT_INC(in_nomem); + } + } + return (sav); } /* @@ -2878,6 +2912,13 @@ done: static void key_cleansav(struct secasvar *sav) { + + if (sav->natt != NULL) { + free(sav->natt, M_IPSEC_MISC); + sav->natt = NULL; + } + if (sav->flags & SADB_X_EXT_F_CLONED) + return; /* * Cleanup xform state. Note that zeroize'ing causes the * keys to be cleared; otherwise we must do it ourself. @@ -2886,7 +2927,6 @@ key_cleansav(struct secasvar *sav) sav->tdb_xform->xf_zeroize(sav); sav->tdb_xform = NULL; } else { - KASSERT(sav->iv == NULL, ("iv but no xform")); if (sav->key_auth != NULL) bzero(sav->key_auth->key_data, _KEYLEN(sav->key_auth)); if (sav->key_enc != NULL) @@ -2904,19 +2944,12 @@ key_cleansav(struct secasvar *sav) free(sav->key_enc, M_IPSEC_MISC); sav->key_enc = NULL; } - if (sav->sched) { - bzero(sav->sched, sav->schedlen); - free(sav->sched, M_IPSEC_MISC); - sav->sched = NULL; - } if (sav->replay != NULL) { + if (sav->replay->bitmap != NULL) + free(sav->replay->bitmap, M_IPSEC_MISC); free(sav->replay, M_IPSEC_MISC); sav->replay = NULL; } - if (sav->lft_c != NULL) { - free(sav->lft_c, M_IPSEC_MISC); - sav->lft_c = NULL; - } if (sav->lft_h != NULL) { free(sav->lft_h, M_IPSEC_MISC); sav->lft_h = NULL; @@ -2931,202 +2964,288 @@ key_cleansav(struct secasvar *sav) * free() SA variable entry. */ static void -key_delsav(sav) - struct secasvar *sav; +key_delsav(struct secasvar *sav) { IPSEC_ASSERT(sav != NULL, ("null sav")); - IPSEC_ASSERT(sav->refcnt == 0, ("reference count %u > 0", sav->refcnt)); + IPSEC_ASSERT(sav->state == SADB_SASTATE_DEAD, + ("attempt to free non DEAD SA %p", sav)); + IPSEC_ASSERT(sav->refcnt == 0, ("reference count %u > 0", + sav->refcnt)); - /* remove from SA header */ - if (__LIST_CHAINED(sav)) - LIST_REMOVE(sav, chain); + /* + * SA must be unlinked from the chain and hashtbl. + * If SA was cloned, we leave all fields untouched, + * except NAT-T config. + */ key_cleansav(sav); - SECASVAR_LOCK_DESTROY(sav); + if ((sav->flags & SADB_X_EXT_F_CLONED) == 0) { + mtx_destroy(sav->lock); + free(sav->lock, M_IPSEC_MISC); + uma_zfree(V_key_lft_zone, sav->lft_c); + } free(sav, M_IPSEC_SA); } /* - * search SAD. + * search SAH. * OUT: * NULL : not found - * others : found, pointer to a SA. + * others : found, referenced pointer to a SAH. */ static struct secashead * -key_getsah(saidx) - struct secasindex *saidx; +key_getsah(struct secasindex *saidx) { + SAHTREE_RLOCK_TRACKER; struct secashead *sah; - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, saidx, CMP_REQID)) - break; + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE_REQID) != 0) { + SAH_ADDREF(sah); + break; + } } - SAHTREE_UNLOCK(); - - return sah; + SAHTREE_RUNLOCK(); + return (sah); } /* - * check not to be duplicated SPI. - * NOTE: this function is too slow due to searching all SAD. + * Check not to be duplicated SPI. * OUT: - * NULL : not found - * others : found, pointer to a SA. + * 0 : not found + * 1 : found SA with given SPI. */ -static struct secasvar * -key_checkspidup(saidx, spi) - struct secasindex *saidx; - u_int32_t spi; +static int +key_checkspidup(uint32_t spi) { - struct secashead *sah; + SAHTREE_RLOCK_TRACKER; struct secasvar *sav; - /* check address family */ - if (saidx->src.sa.sa_family != saidx->dst.sa.sa_family) { - ipseclog((LOG_DEBUG, "%s: address family mismatched.\n", - __func__)); - return NULL; - } - - sav = NULL; - /* check all SAD */ - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (!key_ismyaddr((struct sockaddr *)&sah->saidx.dst)) - continue; - sav = key_getsavbyspi(sah, spi); - if (sav != NULL) + /* Assume SPI is in network byte order */ + SAHTREE_RLOCK(); + LIST_FOREACH(sav, SAVHASH_HASH(spi), spihash) { + if (sav->spi == spi) break; } - SAHTREE_UNLOCK(); - - return sav; + SAHTREE_RUNLOCK(); + return (sav != NULL); } /* - * search SAD litmited alive SA, protocol, SPI. + * Search SA by SPI. * OUT: * NULL : not found - * others : found, pointer to a SA. + * others : found, referenced pointer to a SA. */ static struct secasvar * -key_getsavbyspi(sah, spi) - struct secashead *sah; - u_int32_t spi; +key_getsavbyspi(uint32_t spi) { + SAHTREE_RLOCK_TRACKER; struct secasvar *sav; - u_int stateidx, state; - sav = NULL; - SAHTREE_LOCK_ASSERT(); - /* search all status */ - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_alive); - stateidx++) { - - state = saorder_state_alive[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - - /* sanity check */ - if (sav->state != state) { - ipseclog((LOG_DEBUG, "%s: " - "invalid sav->state (queue: %d SA: %d)\n", - __func__, state, sav->state)); - continue; - } - - if (sav->spi == spi) - return sav; - } + /* Assume SPI is in network byte order */ + SAHTREE_RLOCK(); + LIST_FOREACH(sav, SAVHASH_HASH(spi), spihash) { + if (sav->spi != spi) + continue; + SAV_ADDREF(sav); + break; } - - return NULL; + SAHTREE_RUNLOCK(); + return (sav); } -/* - * copy SA values from PF_KEY message except *SPI, SEQ, PID, STATE and TYPE*. - * You must update these if need. - * OUT: 0: success. - * !0: failure. - * - * does not modify mbuf. does not free mbuf on error. - */ static int -key_setsaval(sav, m, mhp) - struct secasvar *sav; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_updatelifetimes(struct secasvar *sav, const struct sadb_msghdr *mhp) { - int error = 0; + struct seclifetime *lft_h, *lft_s, *tmp; + + /* Lifetime extension is optional, check that it is present. */ + if (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD) && + SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT)) { + /* + * In case of SADB_UPDATE we may need to change + * existing lifetimes. + */ + if (sav->state == SADB_SASTATE_MATURE) { + lft_h = lft_s = NULL; + goto reset; + } + return (0); + } + /* Both HARD and SOFT extensions must present */ + if ((SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT)) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD))) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); + return (EINVAL); + } + if (SADB_CHECKLEN(mhp, SADB_EXT_LIFETIME_HARD) || + SADB_CHECKLEN(mhp, SADB_EXT_LIFETIME_SOFT)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); + return (EINVAL); + } + lft_h = key_dup_lifemsg((const struct sadb_lifetime *) + mhp->ext[SADB_EXT_LIFETIME_HARD], M_IPSEC_MISC); + if (lft_h == NULL) { + PFKEYSTAT_INC(in_nomem); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } + lft_s = key_dup_lifemsg((const struct sadb_lifetime *) + mhp->ext[SADB_EXT_LIFETIME_SOFT], M_IPSEC_MISC); + if (lft_s == NULL) { + PFKEYSTAT_INC(in_nomem); + free(lft_h, M_IPSEC_MISC); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } +reset: + if (sav->state != SADB_SASTATE_LARVAL) { + /* + * key_update() holds reference to this SA, + * so it won't be deleted in meanwhile. + */ + SECASVAR_LOCK(sav); + tmp = sav->lft_h; + sav->lft_h = lft_h; + lft_h = tmp; + + tmp = sav->lft_s; + sav->lft_s = lft_s; + lft_s = tmp; + SECASVAR_UNLOCK(sav); + if (lft_h != NULL) + free(lft_h, M_IPSEC_MISC); + if (lft_s != NULL) + free(lft_s, M_IPSEC_MISC); + return (0); + } + /* We can update lifetime without holding a lock */ + IPSEC_ASSERT(sav->lft_h == NULL, ("lft_h is already initialized\n")); + IPSEC_ASSERT(sav->lft_s == NULL, ("lft_s is already initialized\n")); + sav->lft_h = lft_h; + sav->lft_s = lft_s; + return (0); +} + +/* + * copy SA values from PF_KEY message except *SPI, SEQ, PID and TYPE*. + * You must update these if need. Expects only LARVAL SAs. + * OUT: 0: success. + * !0: failure. + */ +static int +key_setsaval(struct secasvar *sav, const struct sadb_msghdr *mhp) +{ + const struct sadb_sa *sa0; + const struct sadb_key *key0; + uint32_t replay; + size_t len; + int error; - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); + IPSEC_ASSERT(sav->state == SADB_SASTATE_LARVAL, + ("Attempt to update non LARVAL SA")); - /* initialization */ - sav->replay = NULL; - sav->key_auth = NULL; - sav->key_enc = NULL; - sav->sched = NULL; - sav->schedlen = 0; - sav->iv = NULL; - sav->lft_c = NULL; - sav->lft_h = NULL; - sav->lft_s = NULL; - sav->tdb_xform = NULL; /* transform */ - sav->tdb_encalgxform = NULL; /* encoding algorithm */ - sav->tdb_authalgxform = NULL; /* authentication algorithm */ - sav->tdb_compalgxform = NULL; /* compression algorithm */ - /* Initialize even if NAT-T not compiled in: */ - sav->natt_type = 0; - sav->natt_esp_frag_len = 0; + /* XXX rewrite */ + error = key_setident(sav->sah, mhp); + if (error != 0) + goto fail; /* SA */ - if (mhp->ext[SADB_EXT_SA] != NULL) { - const struct sadb_sa *sa0; - - sa0 = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; - if (mhp->extlen[SADB_EXT_SA] < sizeof(*sa0)) { + if (!SADB_CHECKHDR(mhp, SADB_EXT_SA)) { + if (SADB_CHECKLEN(mhp, SADB_EXT_SA)) { error = EINVAL; goto fail; } - + sa0 = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; sav->alg_auth = sa0->sadb_sa_auth; sav->alg_enc = sa0->sadb_sa_encrypt; sav->flags = sa0->sadb_sa_flags; + if ((sav->flags & SADB_KEY_FLAGS_MAX) != sav->flags) { + ipseclog((LOG_DEBUG, + "%s: invalid sa_flags 0x%08x.\n", __func__, + sav->flags)); + error = EINVAL; + goto fail; + } + + /* Optional replay window */ + replay = 0; + if ((sa0->sadb_sa_flags & SADB_X_EXT_OLD) == 0) + replay = sa0->sadb_sa_replay; + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_SA_REPLAY)) { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA_REPLAY)) { + error = EINVAL; + goto fail; + } + replay = ((const struct sadb_x_sa_replay *) + mhp->ext[SADB_X_EXT_SA_REPLAY])->sadb_x_sa_replay_replay; + + if (replay > UINT32_MAX - 32) { + ipseclog((LOG_DEBUG, + "%s: replay window too big.\n", __func__)); + error = EINVAL; + goto fail; + } + + replay = (replay + 7) >> 3; + } + + sav->replay = malloc(sizeof(struct secreplay), M_IPSEC_MISC, + M_NOWAIT | M_ZERO); + if (sav->replay == NULL) { + PFKEYSTAT_INC(in_nomem); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", + __func__)); + error = ENOBUFS; + goto fail; + } + + if (replay != 0) { + /* number of 32b blocks to be allocated */ + uint32_t bitmap_size; - /* replay window */ - if ((sa0->sadb_sa_flags & SADB_X_EXT_OLD) == 0) { - sav->replay = (struct secreplay *) - malloc(sizeof(struct secreplay)+sa0->sadb_sa_replay, M_IPSEC_MISC, M_NOWAIT|M_ZERO); - if (sav->replay == NULL) { + /* RFC 6479: + * - the allocated replay window size must be + * a power of two. + * - use an extra 32b block as a redundant window. + */ + bitmap_size = 1; + while (replay + 4 > bitmap_size) + bitmap_size <<= 1; + bitmap_size = bitmap_size / 4; + + sav->replay->bitmap = malloc( + bitmap_size * sizeof(uint32_t), M_IPSEC_MISC, + M_NOWAIT | M_ZERO); + if (sav->replay->bitmap == NULL) { + PFKEYSTAT_INC(in_nomem); ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); error = ENOBUFS; goto fail; } - if (sa0->sadb_sa_replay != 0) - sav->replay->bitmap = (caddr_t)(sav->replay+1); - sav->replay->wsize = sa0->sadb_sa_replay; + sav->replay->bitmap_size = bitmap_size; + sav->replay->wsize = replay; } } /* Authentication keys */ - if (mhp->ext[SADB_EXT_KEY_AUTH] != NULL) { - const struct sadb_key *key0; - int len; - - key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_AUTH]; - len = mhp->extlen[SADB_EXT_KEY_AUTH]; - - error = 0; - if (len < sizeof(*key0)) { + if (!SADB_CHECKHDR(mhp, SADB_EXT_KEY_AUTH)) { + if (SADB_CHECKLEN(mhp, SADB_EXT_KEY_AUTH)) { error = EINVAL; goto fail; } + error = 0; + key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_AUTH]; + len = mhp->extlen[SADB_EXT_KEY_AUTH]; switch (mhp->msg->sadb_msg_satype) { case SADB_SATYPE_AH: case SADB_SATYPE_ESP: @@ -3146,29 +3265,25 @@ key_setsaval(sav, m, mhp) goto fail; } - sav->key_auth = (struct seckey *)key_dup_keymsg(key0, len, - M_IPSEC_MISC); + sav->key_auth = key_dup_keymsg(key0, len, M_IPSEC_MISC); if (sav->key_auth == NULL ) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + PFKEYSTAT_INC(in_nomem); error = ENOBUFS; goto fail; } } /* Encryption key */ - if (mhp->ext[SADB_EXT_KEY_ENCRYPT] != NULL) { - const struct sadb_key *key0; - int len; - - key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_ENCRYPT]; - len = mhp->extlen[SADB_EXT_KEY_ENCRYPT]; - - error = 0; - if (len < sizeof(*key0)) { + if (!SADB_CHECKHDR(mhp, SADB_EXT_KEY_ENCRYPT)) { + if (SADB_CHECKLEN(mhp, SADB_EXT_KEY_ENCRYPT)) { error = EINVAL; goto fail; } + error = 0; + key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_ENCRYPT]; + len = mhp->extlen[SADB_EXT_KEY_ENCRYPT]; switch (mhp->msg->sadb_msg_satype) { case SADB_SATYPE_ESP: if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) && @@ -3176,12 +3291,11 @@ key_setsaval(sav, m, mhp) error = EINVAL; break; } - sav->key_enc = (struct seckey *)key_dup_keymsg(key0, - len, - M_IPSEC_MISC); + sav->key_enc = key_dup_keymsg(key0, len, M_IPSEC_MISC); if (sav->key_enc == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + PFKEYSTAT_INC(in_nomem); error = ENOBUFS; goto fail; } @@ -3206,172 +3320,83 @@ key_setsaval(sav, m, mhp) /* set iv */ sav->ivlen = 0; - switch (mhp->msg->sadb_msg_satype) { case SADB_SATYPE_AH: - error = xform_init(sav, XF_AH); - break; - case SADB_SATYPE_ESP: - error = xform_init(sav, XF_ESP); - break; - case SADB_X_SATYPE_IPCOMP: - error = xform_init(sav, XF_IPCOMP); - break; - case SADB_X_SATYPE_TCPSIGNATURE: - error = xform_init(sav, XF_TCPSIGNATURE); - break; - } - if (error) { - ipseclog((LOG_DEBUG, "%s: unable to initialize SA type %u.\n", - __func__, mhp->msg->sadb_msg_satype)); - goto fail; - } - - /* reset created */ - sav->created = time_second; - - /* make lifetime for CURRENT */ - sav->lft_c = malloc(sizeof(struct seclifetime), M_IPSEC_MISC, M_NOWAIT); - if (sav->lft_c == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - error = ENOBUFS; - goto fail; - } - - sav->lft_c->allocations = 0; - sav->lft_c->bytes = 0; - sav->lft_c->addtime = time_second; - sav->lft_c->usetime = 0; - - /* lifetimes for HARD and SOFT */ - { - const struct sadb_lifetime *lft0; - - lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD]; - if (lft0 != NULL) { - if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) { + if (sav->flags & SADB_X_EXT_DERIV) { + ipseclog((LOG_DEBUG, "%s: invalid flag (derived) " + "given to AH SA.\n", __func__)); error = EINVAL; goto fail; } - sav->lft_h = key_dup_lifemsg(lft0, M_IPSEC_MISC); - if (sav->lft_h == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); - error = ENOBUFS; - goto fail; - } - /* to be initialize ? */ - } - - lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_SOFT]; - if (lft0 != NULL) { - if (mhp->extlen[SADB_EXT_LIFETIME_SOFT] < sizeof(*lft0)) { + if (sav->alg_enc != SADB_EALG_NONE) { + ipseclog((LOG_DEBUG, "%s: protocol and algorithm " + "mismated.\n", __func__)); error = EINVAL; goto fail; } - sav->lft_s = key_dup_lifemsg(lft0, M_IPSEC_MISC); - if (sav->lft_s == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); - error = ENOBUFS; - goto fail; - } - /* to be initialize ? */ - } - } - - return 0; - - fail: - /* initialization */ - key_cleansav(sav); - - return error; -} - -/* - * validation with a secasvar entry, and set SADB_SATYPE_MATURE. - * OUT: 0: valid - * other: errno - */ -static int -key_mature(struct secasvar *sav) -{ - int error; - - /* check SPI value */ - switch (sav->sah->saidx.proto) { - case IPPROTO_ESP: - case IPPROTO_AH: - /* - * RFC 4302, 2.4. Security Parameters Index (SPI), SPI values - * 1-255 reserved by IANA for future use, - * 0 for implementation specific, local use. - */ - if (ntohl(sav->spi) <= 255) { - ipseclog((LOG_DEBUG, "%s: illegal range of SPI %u.\n", - __func__, (u_int32_t)ntohl(sav->spi))); - return EINVAL; - } + error = xform_init(sav, XF_AH); break; - } - - /* check satype */ - switch (sav->sah->saidx.proto) { - case IPPROTO_ESP: - /* check flags */ - if ((sav->flags & (SADB_X_EXT_OLD|SADB_X_EXT_DERIV)) == - (SADB_X_EXT_OLD|SADB_X_EXT_DERIV)) { + case SADB_SATYPE_ESP: + if ((sav->flags & (SADB_X_EXT_OLD | SADB_X_EXT_DERIV)) == + (SADB_X_EXT_OLD | SADB_X_EXT_DERIV)) { ipseclog((LOG_DEBUG, "%s: invalid flag (derived) " - "given to old-esp.\n", __func__)); - return EINVAL; + "given to old-esp.\n", __func__)); + error = EINVAL; + goto fail; } error = xform_init(sav, XF_ESP); break; - case IPPROTO_AH: - /* check flags */ - if (sav->flags & SADB_X_EXT_DERIV) { - ipseclog((LOG_DEBUG, "%s: invalid flag (derived) " - "given to AH SA.\n", __func__)); - return EINVAL; - } - if (sav->alg_enc != SADB_EALG_NONE) { - ipseclog((LOG_DEBUG, "%s: protocol and algorithm " - "mismated.\n", __func__)); - return(EINVAL); - } - error = xform_init(sav, XF_AH); - break; - case IPPROTO_IPCOMP: + case SADB_X_SATYPE_IPCOMP: if (sav->alg_auth != SADB_AALG_NONE) { ipseclog((LOG_DEBUG, "%s: protocol and algorithm " - "mismated.\n", __func__)); - return(EINVAL); + "mismated.\n", __func__)); + error = EINVAL; + goto fail; } - if ((sav->flags & SADB_X_EXT_RAWCPI) == 0 - && ntohl(sav->spi) >= 0x10000) { + if ((sav->flags & SADB_X_EXT_RAWCPI) == 0 && + ntohl(sav->spi) >= 0x10000) { ipseclog((LOG_DEBUG, "%s: invalid cpi for IPComp.\n", - __func__)); - return(EINVAL); + __func__)); + error = EINVAL; + goto fail; } error = xform_init(sav, XF_IPCOMP); break; - case IPPROTO_TCP: + case SADB_X_SATYPE_TCPSIGNATURE: if (sav->alg_enc != SADB_EALG_NONE) { ipseclog((LOG_DEBUG, "%s: protocol and algorithm " - "mismated.\n", __func__)); - return(EINVAL); + "mismated.\n", __func__)); + error = EINVAL; + goto fail; } error = xform_init(sav, XF_TCPSIGNATURE); break; default: ipseclog((LOG_DEBUG, "%s: Invalid satype.\n", __func__)); error = EPROTONOSUPPORT; - break; + goto fail; } - if (error == 0) { - SAHTREE_LOCK(); - key_sa_chgstate(sav, SADB_SASTATE_MATURE); - SAHTREE_UNLOCK(); + if (error) { + ipseclog((LOG_DEBUG, "%s: unable to initialize SA type %u.\n", + __func__, mhp->msg->sadb_msg_satype)); + goto fail; } + + /* Handle NAT-T headers */ + error = key_setnatt(sav, mhp); + if (error != 0) + goto fail; + + /* Initialize lifetime for CURRENT */ + sav->firstused = 0; + sav->created = time_second; + + /* lifetimes for HARD and SOFT */ + error = key_updatelifetimes(sav, mhp); + if (error == 0) + return (0); +fail: + key_cleansav(sav); return (error); } @@ -3379,32 +3404,32 @@ key_mature(struct secasvar *sav) * subroutine for SADB_GET and SADB_DUMP. */ static struct mbuf * -key_setdumpsa(struct secasvar *sav, u_int8_t type, u_int8_t satype, - u_int32_t seq, u_int32_t pid) +key_setdumpsa(struct secasvar *sav, uint8_t type, uint8_t satype, + uint32_t seq, uint32_t pid) { + struct seclifetime lft_c; struct mbuf *result = NULL, *tres = NULL, *m; - int i; - int dumporder[] = { - SADB_EXT_SA, SADB_X_EXT_SA2, + int i, dumporder[] = { + SADB_EXT_SA, SADB_X_EXT_SA2, SADB_X_EXT_SA_REPLAY, SADB_EXT_LIFETIME_HARD, SADB_EXT_LIFETIME_SOFT, SADB_EXT_LIFETIME_CURRENT, SADB_EXT_ADDRESS_SRC, - SADB_EXT_ADDRESS_DST, SADB_EXT_ADDRESS_PROXY, SADB_EXT_KEY_AUTH, - SADB_EXT_KEY_ENCRYPT, SADB_EXT_IDENTITY_SRC, - SADB_EXT_IDENTITY_DST, SADB_EXT_SENSITIVITY, -#ifdef IPSEC_NAT_T + SADB_EXT_ADDRESS_DST, SADB_EXT_ADDRESS_PROXY, + SADB_EXT_KEY_AUTH, SADB_EXT_KEY_ENCRYPT, + SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST, + SADB_EXT_SENSITIVITY, SADB_X_EXT_NAT_T_TYPE, SADB_X_EXT_NAT_T_SPORT, SADB_X_EXT_NAT_T_DPORT, SADB_X_EXT_NAT_T_OAI, SADB_X_EXT_NAT_T_OAR, SADB_X_EXT_NAT_T_FRAG, -#endif }; + uint32_t replay_count; m = key_setsadbmsg(type, 0, satype, seq, pid, sav->refcnt); if (m == NULL) goto fail; result = m; - for (i = sizeof(dumporder)/sizeof(dumporder[0]) - 1; i >= 0; i--) { + for (i = nitems(dumporder) - 1; i >= 0; i--) { m = NULL; switch (dumporder[i]) { case SADB_EXT_SA: @@ -3414,13 +3439,25 @@ key_setdumpsa(struct secasvar *sav, u_int8_t type, u_int8_t satype, break; case SADB_X_EXT_SA2: - m = key_setsadbxsa2(sav->sah->saidx.mode, - sav->replay ? sav->replay->count : 0, + SECASVAR_LOCK(sav); + replay_count = sav->replay ? sav->replay->count : 0; + SECASVAR_UNLOCK(sav); + m = key_setsadbxsa2(sav->sah->saidx.mode, replay_count, sav->sah->saidx.reqid); if (!m) goto fail; break; + case SADB_X_EXT_SA_REPLAY: + if (sav->replay == NULL || + sav->replay->wsize <= UINT8_MAX) + continue; + + m = key_setsadbxsareplay(sav->replay->wsize); + if (!m) + goto fail; + break; + case SADB_EXT_ADDRESS_SRC: m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sav->sah->saidx.src.sa, @@ -3454,10 +3491,12 @@ key_setdumpsa(struct secasvar *sav, u_int8_t type, u_int8_t satype, break; case SADB_EXT_LIFETIME_CURRENT: - if (!sav->lft_c) - continue; - m = key_setlifetime(sav->lft_c, - SADB_EXT_LIFETIME_CURRENT); + lft_c.addtime = sav->created; + lft_c.allocations = (uint32_t)counter_u64_fetch( + sav->lft_c_allocations); + lft_c.bytes = counter_u64_fetch(sav->lft_c_bytes); + lft_c.usetime = sav->firstused; + m = key_setlifetime(&lft_c, SADB_EXT_LIFETIME_CURRENT); if (!m) goto fail; break; @@ -3481,35 +3520,53 @@ key_setdumpsa(struct secasvar *sav, u_int8_t type, u_int8_t satype, goto fail; break; -#ifdef IPSEC_NAT_T case SADB_X_EXT_NAT_T_TYPE: - m = key_setsadbxtype(sav->natt_type); + if (sav->natt == NULL) + continue; + m = key_setsadbxtype(UDP_ENCAP_ESPINUDP); if (!m) goto fail; break; - + case SADB_X_EXT_NAT_T_DPORT: - m = key_setsadbxport( - KEY_PORTFROMSADDR(&sav->sah->saidx.dst), + if (sav->natt == NULL) + continue; + m = key_setsadbxport(sav->natt->dport, SADB_X_EXT_NAT_T_DPORT); if (!m) goto fail; break; case SADB_X_EXT_NAT_T_SPORT: - m = key_setsadbxport( - KEY_PORTFROMSADDR(&sav->sah->saidx.src), + if (sav->natt == NULL) + continue; + m = key_setsadbxport(sav->natt->sport, SADB_X_EXT_NAT_T_SPORT); if (!m) goto fail; break; case SADB_X_EXT_NAT_T_OAI: + if (sav->natt == NULL || + (sav->natt->flags & IPSEC_NATT_F_OAI) == 0) + continue; + m = key_setsadbaddr(SADB_X_EXT_NAT_T_OAI, + &sav->natt->oai.sa, FULLMASK, IPSEC_ULPROTO_ANY); + if (!m) + goto fail; + break; case SADB_X_EXT_NAT_T_OAR: + if (sav->natt == NULL || + (sav->natt->flags & IPSEC_NATT_F_OAR) == 0) + continue; + m = key_setsadbaddr(SADB_X_EXT_NAT_T_OAR, + &sav->natt->oar.sa, FULLMASK, IPSEC_ULPROTO_ANY); + if (!m) + goto fail; + break; case SADB_X_EXT_NAT_T_FRAG: /* We do not (yet) support those. */ continue; -#endif case SADB_EXT_ADDRESS_PROXY: case SADB_EXT_IDENTITY_SRC: @@ -3525,10 +3582,10 @@ key_setdumpsa(struct secasvar *sav, u_int8_t type, u_int8_t satype, if (tres) m_cat(m, tres); tres = m; - } m_cat(result, tres); + tres = NULL; if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) @@ -3564,10 +3621,9 @@ key_setsadbmsg(u_int8_t type, u_int16_t tlen, u_int8_t satype, u_int32_t seq, len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); if (len > MCLBYTES) return NULL; - MGETHDR(m, M_DONTWAIT, MT_DATA); + MGETHDR(m, M_NOWAIT, MT_DATA); if (m && len > MHLEN) { - MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { + if (!(MCLGET(m, M_NOWAIT))) { m_freem(m); m = NULL; } @@ -3596,41 +3652,39 @@ key_setsadbmsg(u_int8_t type, u_int16_t tlen, u_int8_t satype, u_int32_t seq, * copy secasvar data into sadb_address. */ static struct mbuf * -key_setsadbsa(sav) - struct secasvar *sav; +key_setsadbsa(struct secasvar *sav) { struct mbuf *m; struct sadb_sa *p; int len; len = PFKEY_ALIGN8(sizeof(struct sadb_sa)); - m = key_alloc_mbuf(len); - if (!m || m->m_next) { /*XXX*/ - if (m) - m_freem(m); - return NULL; - } - + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) + return (NULL); + m_align(m, len); + m->m_len = len; p = mtod(m, struct sadb_sa *); - bzero(p, len); p->sadb_sa_len = PFKEY_UNIT64(len); p->sadb_sa_exttype = SADB_EXT_SA; p->sadb_sa_spi = sav->spi; - p->sadb_sa_replay = (sav->replay != NULL ? sav->replay->wsize : 0); + p->sadb_sa_replay = sav->replay ? + (sav->replay->wsize > UINT8_MAX ? UINT8_MAX : + sav->replay->wsize): 0; p->sadb_sa_state = sav->state; p->sadb_sa_auth = sav->alg_auth; p->sadb_sa_encrypt = sav->alg_enc; - p->sadb_sa_flags = sav->flags; - - return m; + p->sadb_sa_flags = sav->flags & SADB_KEY_FLAGS_MAX; + return (m); } /* * set data into sadb_address. */ static struct mbuf * -key_setsadbaddr(u_int16_t exttype, const struct sockaddr *saddr, u_int8_t prefixlen, u_int16_t ul_proto) +key_setsadbaddr(u_int16_t exttype, const struct sockaddr *saddr, + u_int8_t prefixlen, u_int16_t ul_proto) { struct mbuf *m; struct sadb_address *p; @@ -3638,13 +3692,11 @@ key_setsadbaddr(u_int16_t exttype, const struct sockaddr *saddr, u_int8_t prefix len = PFKEY_ALIGN8(sizeof(struct sadb_address)) + PFKEY_ALIGN8(saddr->sa_len); - m = key_alloc_mbuf(len); - if (!m || m->m_next) { /*XXX*/ - if (m) - m_freem(m); - return NULL; - } - + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) + return (NULL); + m_align(m, len); + m->m_len = len; p = mtod(m, struct sadb_address *); bzero(p, len); @@ -3684,13 +3736,11 @@ key_setsadbxsa2(u_int8_t mode, u_int32_t seq, u_int32_t reqid) size_t len; len = PFKEY_ALIGN8(sizeof(struct sadb_x_sa2)); - m = key_alloc_mbuf(len); - if (!m || m->m_next) { /*XXX*/ - if (m) - m_freem(m); - return NULL; - } - + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) + return (NULL); + m_align(m, len); + m->m_len = len; p = mtod(m, struct sadb_x_sa2 *); bzero(p, len); @@ -3705,7 +3755,32 @@ key_setsadbxsa2(u_int8_t mode, u_int32_t seq, u_int32_t reqid) return m; } -#ifdef IPSEC_NAT_T +/* + * Set data into sadb_x_sa_replay. + */ +static struct mbuf * +key_setsadbxsareplay(u_int32_t replay) +{ + struct mbuf *m; + struct sadb_x_sa_replay *p; + size_t len; + + len = PFKEY_ALIGN8(sizeof(struct sadb_x_sa_replay)); + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) + return (NULL); + m_align(m, len); + m->m_len = len; + p = mtod(m, struct sadb_x_sa_replay *); + + bzero(p, len); + p->sadb_x_sa_replay_len = PFKEY_UNIT64(len); + p->sadb_x_sa_replay_exttype = SADB_X_EXT_SA_REPLAY; + p->sadb_x_sa_replay_replay = (replay << 3); + + return m; +} + /* * Set a type in sadb_x_nat_t_type. */ @@ -3718,13 +3793,11 @@ key_setsadbxtype(u_int16_t type) len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_type)); - m = key_alloc_mbuf(len); - if (!m || m->m_next) { /*XXX*/ - if (m) - m_freem(m); + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) return (NULL); - } - + m_align(m, len); + m->m_len = len; p = mtod(m, struct sadb_x_nat_t_type *); bzero(p, len); @@ -3747,13 +3820,11 @@ key_setsadbxport(u_int16_t port, u_int16_t type) len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_port)); - m = key_alloc_mbuf(len); - if (!m || m->m_next) { /*XXX*/ - if (m) - m_freem(m); + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) return (NULL); - } - + m_align(m, len); + m->m_len = len; p = mtod(m, struct sadb_x_nat_t_port *); bzero(p, len); @@ -3764,10 +3835,10 @@ key_setsadbxport(u_int16_t port, u_int16_t type) return (m); } -/* +/* * Get port from sockaddr. Port is in network byte order. */ -u_int16_t +uint16_t key_portfromsaddr(struct sockaddr *sa) { @@ -3781,18 +3852,14 @@ key_portfromsaddr(struct sockaddr *sa) return ((struct sockaddr_in6 *)sa)->sin6_port; #endif } - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s unexpected address family %d\n", - __func__, sa->sa_family)); return (0); } -#endif /* IPSEC_NAT_T */ /* * Set port in struct sockaddr. Port is in network byte order. */ -static void -key_porttosaddr(struct sockaddr *sa, u_int16_t port) +void +key_porttosaddr(struct sockaddr *sa, uint16_t port) { switch (sa->sa_family) { @@ -3817,21 +3884,19 @@ key_porttosaddr(struct sockaddr *sa, u_int16_t port) * set data into sadb_x_policy */ static struct mbuf * -key_setsadbxpolicy(u_int16_t type, u_int8_t dir, u_int32_t id) +key_setsadbxpolicy(u_int16_t type, u_int8_t dir, u_int32_t id, u_int32_t priority) { struct mbuf *m; struct sadb_x_policy *p; size_t len; len = PFKEY_ALIGN8(sizeof(struct sadb_x_policy)); - m = key_alloc_mbuf(len); - if (!m || m->m_next) { /*XXX*/ - if (m) - m_freem(m); - return NULL; - } - - p = mtod(m, struct sadb_x_policy *); + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) + return (NULL); + m_align(m, len); + m->m_len = len; + p = mtod(m, struct sadb_x_policy *); bzero(p, len); p->sadb_x_policy_len = PFKEY_UNIT64(len); @@ -3839,6 +3904,7 @@ key_setsadbxpolicy(u_int16_t type, u_int8_t dir, u_int32_t id) p->sadb_x_policy_type = type; p->sadb_x_policy_dir = dir; p->sadb_x_policy_id = id; + p->sadb_x_policy_priority = priority; return m; } @@ -3851,29 +3917,29 @@ key_setsadbxpolicy(u_int16_t type, u_int8_t dir, u_int32_t id) * OUT: NULL no more memory */ struct seckey * -key_dup_keymsg(const struct sadb_key *src, u_int len, - struct malloc_type *type) +key_dup_keymsg(const struct sadb_key *src, size_t len, + struct malloc_type *type) { struct seckey *dst; - dst = (struct seckey *)malloc(sizeof(struct seckey), type, M_NOWAIT); + + dst = malloc(sizeof(*dst), type, M_NOWAIT); if (dst != NULL) { dst->bits = src->sadb_key_bits; - dst->key_data = (char *)malloc(len, type, M_NOWAIT); + dst->key_data = malloc(len, type, M_NOWAIT); if (dst->key_data != NULL) { - bcopy((const char *)src + sizeof(struct sadb_key), - dst->key_data, len); + bcopy((const char *)(src + 1), dst->key_data, len); } else { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", - __func__)); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", + __func__)); free(dst, type); dst = NULL; } } else { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", - __func__)); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", + __func__)); } - return dst; + return (dst); } /* Take a lifetime message (sadb_lifetime) passed in on a socket and @@ -3884,118 +3950,21 @@ key_dup_keymsg(const struct sadb_key *src, u_int len, */ static struct seclifetime * -key_dup_lifemsg(const struct sadb_lifetime *src, - struct malloc_type *type) +key_dup_lifemsg(const struct sadb_lifetime *src, struct malloc_type *type) { - struct seclifetime *dst = NULL; + struct seclifetime *dst; - dst = (struct seclifetime *)malloc(sizeof(struct seclifetime), - type, M_NOWAIT); + dst = malloc(sizeof(*dst), type, M_NOWAIT); if (dst == NULL) { - /* XXX counter */ ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - } else { - dst->allocations = src->sadb_lifetime_allocations; - dst->bytes = src->sadb_lifetime_bytes; - dst->addtime = src->sadb_lifetime_addtime; - dst->usetime = src->sadb_lifetime_usetime; - } - return dst; -} - -/* compare my own address - * OUT: 1: true, i.e. my address. - * 0: false - */ -int -key_ismyaddr(sa) - struct sockaddr *sa; -{ -#ifdef INET - struct sockaddr_in *sin; - struct in_ifaddr *ia; -#endif - - IPSEC_ASSERT(sa != NULL, ("null sockaddr")); - - switch (sa->sa_family) { -#ifdef INET - case AF_INET: - sin = (struct sockaddr_in *)sa; - IN_IFADDR_RLOCK(); - for (ia = V_in_ifaddrhead.tqh_first; ia; - ia = ia->ia_link.tqe_next) - { - if (sin->sin_family == ia->ia_addr.sin_family && - sin->sin_len == ia->ia_addr.sin_len && - sin->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) - { - IN_IFADDR_RUNLOCK(); - return 1; - } - } - IN_IFADDR_RUNLOCK(); - break; -#endif -#ifdef INET6 - case AF_INET6: - return key_ismyaddr6((struct sockaddr_in6 *)sa); -#endif - } - - return 0; -} - -#ifdef INET6 -/* - * compare my own address for IPv6. - * 1: ours - * 0: other - * NOTE: derived ip6_input() in KAME. This is necessary to modify more. - */ -#include - -static int -key_ismyaddr6(sin6) - struct sockaddr_in6 *sin6; -{ - struct in6_ifaddr *ia; -#if 0 - struct in6_multi *in6m; -#endif - - IN6_IFADDR_RLOCK(); - TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { - if (key_sockaddrcmp((struct sockaddr *)&sin6, - (struct sockaddr *)&ia->ia_addr, 0) == 0) { - IN6_IFADDR_RUNLOCK(); - return 1; - } - -#if 0 - /* - * XXX Multicast - * XXX why do we care about multlicast here while we don't care - * about IPv4 multicast?? - * XXX scope - */ - in6m = NULL; - IN6_LOOKUP_MULTI(sin6->sin6_addr, ia->ia_ifp, in6m); - if (in6m) { - IN6_IFADDR_RUNLOCK(); - return 1; - } -#endif + return (NULL); } - IN6_IFADDR_RUNLOCK(); - - /* loopback, just for safety */ - if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr)) - return 1; - - return 0; + dst->allocations = src->sadb_lifetime_allocations; + dst->bytes = src->sadb_lifetime_bytes; + dst->addtime = src->sadb_lifetime_addtime; + dst->usetime = src->sadb_lifetime_usetime; + return (dst); } -#endif /*INET6*/ /* * compare two secasindex structure. @@ -4010,12 +3979,9 @@ key_ismyaddr6(sin6) * 0 : not equal */ static int -key_cmpsaidx( - const struct secasindex *saidx0, - const struct secasindex *saidx1, - int flag) +key_cmpsaidx(const struct secasindex *saidx0, const struct secasindex *saidx1, + int flag) { - int chkport = 0; /* sanity */ if (saidx0 == NULL && saidx1 == NULL) @@ -4032,19 +3998,21 @@ key_cmpsaidx( return 0; if (saidx0->reqid != saidx1->reqid) return 0; - if (bcmp(&saidx0->src, &saidx1->src, saidx0->src.sa.sa_len) != 0 || - bcmp(&saidx0->dst, &saidx1->dst, saidx0->dst.sa.sa_len) != 0) + if (bcmp(&saidx0->src, &saidx1->src, + saidx0->src.sa.sa_len) != 0 || + bcmp(&saidx0->dst, &saidx1->dst, + saidx0->dst.sa.sa_len) != 0) return 0; } else { /* CMP_MODE_REQID, CMP_REQID, CMP_HEAD */ - if (flag == CMP_MODE_REQID - ||flag == CMP_REQID) { + if (flag == CMP_MODE_REQID || flag == CMP_REQID) { /* * If reqid of SPD is non-zero, unique SA is required. * The result must be of same reqid in this case. */ - if (saidx1->reqid != 0 && saidx0->reqid != saidx1->reqid) + if (saidx1->reqid != 0 && + saidx0->reqid != saidx1->reqid) return 0; } @@ -4054,27 +4022,10 @@ key_cmpsaidx( return 0; } -#ifdef IPSEC_NAT_T - /* - * If NAT-T is enabled, check ports for tunnel mode. - * Do not check ports if they are set to zero in the SPD. - * Also do not do it for transport mode, as there is no - * port information available in the SP. - */ - if (saidx1->mode == IPSEC_MODE_TUNNEL && - saidx1->src.sa.sa_family == AF_INET && - saidx1->dst.sa.sa_family == AF_INET && - ((const struct sockaddr_in *)(&saidx1->src))->sin_port && - ((const struct sockaddr_in *)(&saidx1->dst))->sin_port) - chkport = 1; -#endif /* IPSEC_NAT_T */ - - if (key_sockaddrcmp(&saidx0->src.sa, &saidx1->src.sa, chkport) != 0) { + if (key_sockaddrcmp(&saidx0->src.sa, &saidx1->src.sa, 0) != 0) return 0; - } - if (key_sockaddrcmp(&saidx0->dst.sa, &saidx1->dst.sa, chkport) != 0) { + if (key_sockaddrcmp(&saidx0->dst.sa, &saidx1->dst.sa, 0) != 0) return 0; - } } return 1; @@ -4090,9 +4041,8 @@ key_cmpsaidx( * 0 : not equal */ static int -key_cmpspidx_exactly( - struct secpolicyindex *spidx0, - struct secpolicyindex *spidx1) +key_cmpspidx_exactly(struct secpolicyindex *spidx0, + struct secpolicyindex *spidx1) { /* sanity */ if (spidx0 == NULL && spidx1 == NULL) @@ -4120,9 +4070,8 @@ key_cmpspidx_exactly( * 0 : not equal */ static int -key_cmpspidx_withmask( - struct secpolicyindex *spidx0, - struct secpolicyindex *spidx1) +key_cmpspidx_withmask(struct secpolicyindex *spidx0, + struct secpolicyindex *spidx1) { /* sanity */ if (spidx0 == NULL && spidx1 == NULL) @@ -4211,13 +4160,6 @@ key_cmpspidx_withmask( return 1; } -/* returns 0 on match */ -static int -key_sockaddrcmp( - const struct sockaddr *sa1, - const struct sockaddr *sa2, - int port) -{ #ifdef satosin #undef satosin #endif @@ -4226,10 +4168,16 @@ key_sockaddrcmp( #undef satosin6 #endif #define satosin6(s) ((const struct sockaddr_in6 *)s) +/* returns 0 on match */ +int +key_sockaddrcmp(const struct sockaddr *sa1, const struct sockaddr *sa2, + int port) +{ if (sa1->sa_family != sa2->sa_family || sa1->sa_len != sa2->sa_len) return 1; switch (sa1->sa_family) { +#ifdef INET case AF_INET: if (sa1->sa_len != sizeof(struct sockaddr_in)) return 1; @@ -4240,6 +4188,8 @@ key_sockaddrcmp( if (port && satosin(sa1)->sin_port != satosin(sa2)->sin_port) return 1; break; +#endif +#ifdef INET6 case AF_INET6: if (sa1->sa_len != sizeof(struct sockaddr_in6)) return 1; /*EINVAL*/ @@ -4256,6 +4206,7 @@ key_sockaddrcmp( return 1; } break; +#endif default: if (bcmp(sa1, sa2, sa1->sa_len) != 0) return 1; @@ -4263,9 +4214,35 @@ key_sockaddrcmp( } return 0; +} + +/* returns 0 on match */ +int +key_sockaddrcmp_withmask(const struct sockaddr *sa1, + const struct sockaddr *sa2, size_t mask) +{ + if (sa1->sa_family != sa2->sa_family || sa1->sa_len != sa2->sa_len) + return (1); + + switch (sa1->sa_family) { +#ifdef INET + case AF_INET: + return (!key_bbcmp(&satosin(sa1)->sin_addr, + &satosin(sa2)->sin_addr, mask)); +#endif +#ifdef INET6 + case AF_INET6: + if (satosin6(sa1)->sin6_scope_id != + satosin6(sa2)->sin6_scope_id) + return (1); + return (!key_bbcmp(&satosin6(sa1)->sin6_addr, + &satosin6(sa2)->sin6_addr, mask)); +#endif + } + return (1); +} #undef satosin #undef satosin6 -} /* * compare two buffers with mask. @@ -4307,185 +4284,256 @@ key_bbcmp(const void *a1, const void *a2, u_int bits) static void key_flush_spd(time_t now) { - static u_int16_t sptree_scangen = 0; - u_int16_t gen = sptree_scangen++; - struct secpolicy *sp; + SPTREE_RLOCK_TRACKER; + struct secpolicy_list drainq; + struct secpolicy *sp, *nextsp; u_int dir; - /* SPD */ + LIST_INIT(&drainq); + SPTREE_RLOCK(); for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { -restart: - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[dir], chain) { - if (sp->scangen == gen) /* previously handled */ - continue; - sp->scangen = gen; - if (sp->state == IPSEC_SPSTATE_DEAD && - sp->refcnt == 1) { - /* - * Ensure that we only decrease refcnt once, - * when we're the last consumer. - * Directly call SP_DELREF/key_delsp instead - * of KEY_FREESP to avoid unlocking/relocking - * SPTREE_LOCK before key_delsp: may refcnt - * be increased again during that time ? - * NB: also clean entries created by - * key_spdflush - */ - SP_DELREF(sp); - key_delsp(sp); - SPTREE_UNLOCK(); - goto restart; - } + TAILQ_FOREACH(sp, &V_sptree[dir], chain) { if (sp->lifetime == 0 && sp->validtime == 0) continue; - if ((sp->lifetime && now - sp->created > sp->lifetime) - || (sp->validtime && now - sp->lastused > sp->validtime)) { - sp->state = IPSEC_SPSTATE_DEAD; - SPTREE_UNLOCK(); - key_spdexpire(sp); - goto restart; + if ((sp->lifetime && + now - sp->created > sp->lifetime) || + (sp->validtime && + now - sp->lastused > sp->validtime)) { + /* Hold extra reference to send SPDEXPIRE */ + SP_ADDREF(sp); + LIST_INSERT_HEAD(&drainq, sp, drainq); } } - SPTREE_UNLOCK(); + } + SPTREE_RUNLOCK(); + if (LIST_EMPTY(&drainq)) + return; + + SPTREE_WLOCK(); + sp = LIST_FIRST(&drainq); + while (sp != NULL) { + nextsp = LIST_NEXT(sp, drainq); + /* Check that SP is still linked */ + if (sp->state != IPSEC_SPSTATE_ALIVE) { + LIST_REMOVE(sp, drainq); + key_freesp(&sp); /* release extra reference */ + sp = nextsp; + continue; + } + TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); + LIST_REMOVE(sp, idhash); + sp->state = IPSEC_SPSTATE_DEAD; + sp = nextsp; + } + V_sp_genid++; + SPTREE_WUNLOCK(); + + sp = LIST_FIRST(&drainq); + while (sp != NULL) { + nextsp = LIST_NEXT(sp, drainq); + key_spdexpire(sp); + key_freesp(&sp); /* release extra reference */ + key_freesp(&sp); /* release last reference */ + sp = nextsp; } } static void key_flush_sad(time_t now) { + SAHTREE_RLOCK_TRACKER; + struct secashead_list emptyq; + struct secasvar_list drainq, hexpireq, sexpireq, freeq; struct secashead *sah, *nextsah; struct secasvar *sav, *nextsav; - /* SAD */ - SAHTREE_LOCK(); - LIST_FOREACH_SAFE(sah, &V_sahtree, chain, nextsah) { - /* if sah has been dead, then delete it and process next sah. */ - if (sah->state == SADB_SASTATE_DEAD) { - key_delsah(sah); + LIST_INIT(&drainq); + LIST_INIT(&hexpireq); + LIST_INIT(&sexpireq); + LIST_INIT(&emptyq); + + SAHTREE_RLOCK(); + TAILQ_FOREACH(sah, &V_sahtree, chain) { + /* Check for empty SAH */ + if (TAILQ_EMPTY(&sah->savtree_larval) && + TAILQ_EMPTY(&sah->savtree_alive)) { + SAH_ADDREF(sah); + LIST_INSERT_HEAD(&emptyq, sah, drainq); continue; } - - /* if LARVAL entry doesn't become MATURE, delete it. */ - LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_LARVAL], chain, nextsav) { - /* Need to also check refcnt for a larval SA ??? */ - if (now - sav->created > V_key_larval_lifetime) - KEY_FREESAV(&sav); + /* Add all stale LARVAL SAs into drainq */ + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + if (now - sav->created < V_key_larval_lifetime) + continue; + SAV_ADDREF(sav); + LIST_INSERT_HEAD(&drainq, sav, drainq); } - - /* - * check MATURE entry to start to send expire message - * whether or not. - */ - LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_MATURE], chain, nextsav) { - /* we don't need to check. */ - if (sav->lft_s == NULL) + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + /* lifetimes aren't specified */ + if (sav->lft_h == NULL) continue; - - /* sanity check */ - if (sav->lft_c == NULL) { - ipseclog((LOG_DEBUG,"%s: there is no CURRENT " - "time, why?\n", __func__)); + SECASVAR_LOCK(sav); + /* + * Check again with lock held, because it may + * be updated by SADB_UPDATE. + */ + if (sav->lft_h == NULL) { + SECASVAR_UNLOCK(sav); continue; } - - /* check SOFT lifetime */ - if (sav->lft_s->addtime != 0 && - now - sav->created > sav->lft_s->addtime) { - key_sa_chgstate(sav, SADB_SASTATE_DYING); - /* - * Actually, only send expire message if - * SA has been used, as it was done before, - * but should we always send such message, - * and let IKE daemon decide if it should be - * renegotiated or not ? - * XXX expire message will actually NOT be - * sent if SA is only used after soft - * lifetime has been reached, see below - * (DYING state) - */ - if (sav->lft_c->usetime != 0) - key_expire(sav); - } - /* check SOFT lifetime by bytes */ /* - * XXX I don't know the way to delete this SA - * when new SA is installed. Caution when it's - * installed too big lifetime by time. + * RFC 2367: + * HARD lifetimes MUST take precedence over SOFT + * lifetimes, meaning if the HARD and SOFT lifetimes + * are the same, the HARD lifetime will appear on the + * EXPIRE message. */ - else if (sav->lft_s->bytes != 0 && - sav->lft_s->bytes < sav->lft_c->bytes) { - - key_sa_chgstate(sav, SADB_SASTATE_DYING); - /* - * XXX If we keep to send expire - * message in the status of - * DYING. Do remove below code. - */ - key_expire(sav); - } - } - - /* check DYING entry to change status to DEAD. */ - LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_DYING], chain, nextsav) { - /* we don't need to check. */ - if (sav->lft_h == NULL) + /* check HARD lifetime */ + if ((sav->lft_h->addtime != 0 && + now - sav->created > sav->lft_h->addtime) || + (sav->lft_h->usetime != 0 && sav->firstused && + now - sav->firstused > sav->lft_h->usetime) || + (sav->lft_h->bytes != 0 && counter_u64_fetch( + sav->lft_c_bytes) > sav->lft_h->bytes)) { + SECASVAR_UNLOCK(sav); + SAV_ADDREF(sav); + LIST_INSERT_HEAD(&hexpireq, sav, drainq); continue; - - /* sanity check */ - if (sav->lft_c == NULL) { - ipseclog((LOG_DEBUG, "%s: there is no CURRENT " - "time, why?\n", __func__)); + } + /* check SOFT lifetime (only for MATURE SAs) */ + if (sav->state == SADB_SASTATE_MATURE && ( + (sav->lft_s->addtime != 0 && + now - sav->created > sav->lft_s->addtime) || + (sav->lft_s->usetime != 0 && sav->firstused && + now - sav->firstused > sav->lft_s->usetime) || + (sav->lft_s->bytes != 0 && counter_u64_fetch( + sav->lft_c_bytes) > sav->lft_s->bytes))) { + SECASVAR_UNLOCK(sav); + SAV_ADDREF(sav); + LIST_INSERT_HEAD(&sexpireq, sav, drainq); continue; } + SECASVAR_UNLOCK(sav); + } + } + SAHTREE_RUNLOCK(); - if (sav->lft_h->addtime != 0 && - now - sav->created > sav->lft_h->addtime) { - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - KEY_FREESAV(&sav); - } -#if 0 /* XXX Should we keep to send expire message until HARD lifetime ? */ - else if (sav->lft_s != NULL - && sav->lft_s->addtime != 0 - && now - sav->created > sav->lft_s->addtime) { - /* - * XXX: should be checked to be - * installed the valid SA. - */ + if (LIST_EMPTY(&emptyq) && LIST_EMPTY(&drainq) && + LIST_EMPTY(&hexpireq) && LIST_EMPTY(&sexpireq)) + return; - /* - * If there is no SA then sending - * expire message. - */ - key_expire(sav); - } -#endif - /* check HARD lifetime by bytes */ - else if (sav->lft_h->bytes != 0 && - sav->lft_h->bytes < sav->lft_c->bytes) { - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - KEY_FREESAV(&sav); - } + LIST_INIT(&freeq); + SAHTREE_WLOCK(); + /* Unlink stale LARVAL SAs */ + sav = LIST_FIRST(&drainq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + /* Check that SA is still LARVAL */ + if (sav->state != SADB_SASTATE_LARVAL) { + LIST_REMOVE(sav, drainq); + LIST_INSERT_HEAD(&freeq, sav, drainq); + sav = nextsav; + continue; } - - /* delete entry in DEAD */ - LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_DEAD], chain, nextsav) { - /* sanity check */ - if (sav->state != SADB_SASTATE_DEAD) { - ipseclog((LOG_DEBUG, "%s: invalid sav->state " - "(queue: %d SA: %d): kill it anyway\n", - __func__, - SADB_SASTATE_DEAD, sav->state)); - } - /* - * do not call key_freesav() here. - * sav should already be freed, and sav->refcnt - * shows other references to sav - * (such as from SPD). - */ + TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + sav = nextsav; + } + /* Unlink all SAs with expired HARD lifetime */ + sav = LIST_FIRST(&hexpireq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + /* Check that SA is not unlinked */ + if (sav->state == SADB_SASTATE_DEAD) { + LIST_REMOVE(sav, drainq); + LIST_INSERT_HEAD(&freeq, sav, drainq); + sav = nextsav; + continue; + } + TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + sav = nextsav; + } + /* Mark all SAs with expired SOFT lifetime as DYING */ + sav = LIST_FIRST(&sexpireq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + /* Check that SA is not unlinked */ + if (sav->state == SADB_SASTATE_DEAD) { + LIST_REMOVE(sav, drainq); + LIST_INSERT_HEAD(&freeq, sav, drainq); + sav = nextsav; + continue; } + /* + * NOTE: this doesn't change SA order in the chain. + */ + sav->state = SADB_SASTATE_DYING; + sav = nextsav; + } + /* Unlink empty SAHs */ + sah = LIST_FIRST(&emptyq); + while (sah != NULL) { + nextsah = LIST_NEXT(sah, drainq); + /* Check that SAH is still empty and not unlinked */ + if (sah->state == SADB_SASTATE_DEAD || + !TAILQ_EMPTY(&sah->savtree_larval) || + !TAILQ_EMPTY(&sah->savtree_alive)) { + LIST_REMOVE(sah, drainq); + key_freesah(&sah); /* release extra reference */ + sah = nextsah; + continue; + } + TAILQ_REMOVE(&V_sahtree, sah, chain); + LIST_REMOVE(sah, addrhash); + sah->state = SADB_SASTATE_DEAD; + sah = nextsah; + } + SAHTREE_WUNLOCK(); + + /* Send SPDEXPIRE messages */ + sav = LIST_FIRST(&hexpireq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + key_expire(sav, 1); + key_freesah(&sav->sah); /* release reference from SAV */ + key_freesav(&sav); /* release extra reference */ + key_freesav(&sav); /* release last reference */ + sav = nextsav; + } + sav = LIST_FIRST(&sexpireq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + key_expire(sav, 0); + key_freesav(&sav); /* release extra reference */ + sav = nextsav; + } + /* Free stale LARVAL SAs */ + sav = LIST_FIRST(&drainq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + key_freesah(&sav->sah); /* release reference from SAV */ + key_freesav(&sav); /* release extra reference */ + key_freesav(&sav); /* release last reference */ + sav = nextsav; + } + /* Free SAs that were unlinked/changed by someone else */ + sav = LIST_FIRST(&freeq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + key_freesav(&sav); /* release extra reference */ + sav = nextsav; + } + /* Free empty SAH */ + sah = LIST_FIRST(&emptyq); + while (sah != NULL) { + nextsah = LIST_NEXT(sah, drainq); + key_freesah(&sah); /* release extra reference */ + key_freesah(&sah); /* release last reference */ + sah = nextsah; } - SAHTREE_UNLOCK(); } static void @@ -4495,13 +4543,16 @@ key_flush_acq(time_t now) /* ACQ tree */ ACQ_LOCK(); - for (acq = LIST_FIRST(&V_acqtree); acq != NULL; acq = nextacq) { + acq = LIST_FIRST(&V_acqtree); + while (acq != NULL) { nextacq = LIST_NEXT(acq, chain); - if (now - acq->created > V_key_blockacq_lifetime - && __LIST_CHAINED(acq)) { + if (now - acq->created > V_key_blockacq_lifetime) { LIST_REMOVE(acq, chain); + LIST_REMOVE(acq, addrhash); + LIST_REMOVE(acq, seqhash); free(acq, M_IPSEC_SAQ); } + acq = nextacq; } ACQ_UNLOCK(); } @@ -4530,8 +4581,8 @@ key_flush_spacq(time_t now) * and do to remove or to expire. * XXX: year 2038 problem may remain. */ -void -key_timehandler(void) +static void +key_timehandler(void *arg) { VNET_ITERATOR_DECL(vnet_iter); time_t now = time_second; @@ -4549,7 +4600,7 @@ key_timehandler(void) #ifndef IPSEC_DEBUG2 /* do exchange to tick time !! */ - (void)timeout((void *)key_timehandler, (void *)0, hz); + callout_schedule(&key_timer, hz); #endif /* IPSEC_DEBUG2 */ } @@ -4563,9 +4614,7 @@ key_random() } void -key_randomfill(p, l) - void *p; - size_t l; +key_randomfill(void *p, size_t l) { size_t n; u_long v; @@ -4594,8 +4643,8 @@ key_randomfill(p, l) * OUT: * 0: invalid satype. */ -static u_int16_t -key_satype2proto(u_int8_t satype) +static uint8_t +key_satype2proto(uint8_t satype) { switch (satype) { case SADB_SATYPE_UNSPEC: @@ -4619,8 +4668,8 @@ key_satype2proto(u_int8_t satype) * OUT: * 0: invalid protocol type. */ -static u_int8_t -key_proto2satype(u_int16_t proto) +static uint8_t +key_proto2satype(uint8_t proto) { switch (proto) { case IPPROTO_AH: @@ -4651,44 +4700,58 @@ key_proto2satype(u_int16_t proto) * other if success, return pointer to the message to send. */ static int -key_getspi(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_getspi(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - struct sadb_address *src0, *dst0; struct secasindex saidx; - struct secashead *newsah; - struct secasvar *newsav; - u_int8_t proto; - u_int32_t spi; - u_int8_t mode; - u_int32_t reqid; + struct sadb_address *src0, *dst0; + struct secasvar *sav; + uint32_t reqid, spi; int error; + uint8_t mode, proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) +#ifdef PFKEY_STRICT_CHECKS + || SADB_CHECKHDR(mhp, SADB_EXT_SPIRANGE) +#endif + ) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); + error = EINVAL; + goto fail; } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); + if (SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST) +#ifdef PFKEY_STRICT_CHECKS + || SADB_CHECKLEN(mhp, SADB_EXT_SPIRANGE) +#endif + ) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); + error = EINVAL; + goto fail; } - if (mhp->ext[SADB_X_EXT_SA2] != NULL) { - mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; - reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; - } else { + if (SADB_CHECKHDR(mhp, SADB_X_EXT_SA2)) { mode = IPSEC_MODE_ANY; reqid = 0; + } else { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA2)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + error = EINVAL; + goto fail; + } + mode = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + reqid = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); @@ -4698,121 +4761,55 @@ key_getspi(so, m, mhp) if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); - return key_senderror(so, m, EINVAL); - } - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - switch (((struct sockaddr *)(src0 + 1))->sa_family) { - case AF_INET: - if (((struct sockaddr *)(src0 + 1))->sa_len != - sizeof(struct sockaddr_in)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in *)(src0 + 1))->sin_port = 0; - break; - case AF_INET6: - if (((struct sockaddr *)(src0 + 1))->sa_len != - sizeof(struct sockaddr_in6)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in6 *)(src0 + 1))->sin6_port = 0; - break; - default: - ; /*???*/ + error = EINVAL; + goto fail; } - switch (((struct sockaddr *)(dst0 + 1))->sa_family) { - case AF_INET: - if (((struct sockaddr *)(dst0 + 1))->sa_len != - sizeof(struct sockaddr_in)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in *)(dst0 + 1))->sin_port = 0; - break; - case AF_INET6: - if (((struct sockaddr *)(dst0 + 1))->sa_len != - sizeof(struct sockaddr_in6)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in6 *)(dst0 + 1))->sin6_port = 0; - break; - default: - ; /*???*/ + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + error = EINVAL; + goto fail; } - - /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - * We made sure the port numbers are zero above, so we do - * not have to worry in case we do not update them. - */ - if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL) - ipseclog((LOG_DEBUG, "%s: NAT-T OAi present\n", __func__)); - if (mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) - ipseclog((LOG_DEBUG, "%s: NAT-T OAr present\n", __func__)); - - if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_type *type; - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) || - mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid nat-t message " - "passed.\n", __func__)); - return key_senderror(so, m, EINVAL); - } - - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, dport->sadb_x_nat_t_port_port); - } -#endif - /* SPI allocation */ - spi = key_do_getnewspi((struct sadb_spirange *)mhp->ext[SADB_EXT_SPIRANGE], - &saidx); - if (spi == 0) - return key_senderror(so, m, EINVAL); - - /* get a SA index */ - if ((newsah = key_getsah(&saidx)) == NULL) { - /* create a new SA index */ - if ((newsah = key_newsah(&saidx)) == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); - return key_senderror(so, m, ENOBUFS); - } + spi = key_do_getnewspi( + (struct sadb_spirange *)mhp->ext[SADB_EXT_SPIRANGE], &saidx); + if (spi == 0) { + /* + * Requested SPI or SPI range is not available or + * already used. + */ + error = EEXIST; + goto fail; } + sav = key_newsav(mhp, &saidx, spi, &error); + if (sav == NULL) + goto fail; - /* get a new SA */ - /* XXX rewrite */ - newsav = KEY_NEWSAV(m, mhp, newsah, &error); - if (newsav == NULL) { - /* XXX don't free new SA index allocated in above. */ - return key_senderror(so, m, error); + if (sav->seq != 0) { + /* + * RFC2367: + * If the SADB_GETSPI message is in response to a + * kernel-generated SADB_ACQUIRE, the sadb_msg_seq + * MUST be the same as the SADB_ACQUIRE message. + * + * XXXAE: However it doesn't definethe behaviour how to + * check this and what to do if it doesn't match. + * Also what we should do if it matches? + * + * We can compare saidx used in SADB_ACQUIRE with saidx + * used in SADB_GETSPI, but this probably can break + * existing software. For now just warn if it doesn't match. + * + * XXXAE: anyway it looks useless. + */ + key_acqdone(&saidx, sav->seq); } - - /* set spi */ - newsav->spi = htonl(spi); - - /* delete the entry in acqtree */ - if (mhp->msg->sadb_msg_seq != 0) { - struct secacq *acq; - if ((acq = key_getacqbyseq(mhp->msg->sadb_msg_seq)) != NULL) { - /* reset counter in order to deletion by timehandler. */ - acq->created = time_second; - acq->count = 0; - } - } + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); { struct mbuf *n, *nn; @@ -4824,16 +4821,17 @@ key_getspi(so, m, mhp) len = PFKEY_ALIGN8(sizeof(struct sadb_msg)) + PFKEY_ALIGN8(sizeof(struct sadb_sa)); - MGETHDR(n, M_DONTWAIT, MT_DATA); + MGETHDR(n, M_NOWAIT, MT_DATA); if (len > MHLEN) { - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { + if (!(MCLGET(n, M_NOWAIT))) { m_freem(n); n = NULL; } } - if (!n) - return key_senderror(so, m, ENOBUFS); + if (!n) { + error = ENOBUFS; + goto fail; + } n->m_len = len; n->m_next = NULL; @@ -4845,7 +4843,7 @@ key_getspi(so, m, mhp) m_sa = (struct sadb_sa *)(mtod(n, caddr_t) + off); m_sa->sadb_sa_len = PFKEY_UNIT64(sizeof(struct sadb_sa)); m_sa->sadb_sa_exttype = SADB_EXT_SA; - m_sa->sadb_sa_spi = htonl(spi); + m_sa->sadb_sa_spi = spi; /* SPI is already in network byte order */ off += PFKEY_ALIGN8(sizeof(struct sadb_sa)); IPSEC_ASSERT(off == len, @@ -4855,7 +4853,8 @@ key_getspi(so, m, mhp) SADB_EXT_ADDRESS_DST); if (!n->m_next) { m_freem(n); - return key_senderror(so, m, ENOBUFS); + error = ENOBUFS; + goto fail; } if (n->m_len < sizeof(struct sadb_msg)) { @@ -4869,13 +4868,16 @@ key_getspi(so, m, mhp) n->m_pkthdr.len += nn->m_len; newmsg = mtod(n, struct sadb_msg *); - newmsg->sadb_msg_seq = newsav->seq; + newmsg->sadb_msg_seq = sav->seq; newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } + +fail: + return (key_senderror(so, m, error)); } /* @@ -4883,15 +4885,12 @@ key_getspi(so, m, mhp) * called by key_getspi(). * OUT: * 0: failure. - * others: success. + * others: success, SPI in network byte order. */ -static u_int32_t -key_do_getnewspi(spirange, saidx) - struct sadb_spirange *spirange; - struct secasindex *saidx; +static uint32_t +key_do_getnewspi(struct sadb_spirange *spirange, struct secasindex *saidx) { - u_int32_t newspi; - u_int32_t min, max; + uint32_t min, max, newspi, t; int count = V_key_spi_trycnt; /* set spi range to allocate */ @@ -4904,7 +4903,6 @@ key_do_getnewspi(spirange, saidx) } /* IPCOMP needs 2-byte SPI */ if (saidx->proto == IPPROTO_IPCOMP) { - u_int32_t t; if (min >= 0x10000) min = 0xffff; if (max >= 0x10000) @@ -4915,15 +4913,14 @@ key_do_getnewspi(spirange, saidx) } if (min == max) { - if (key_checkspidup(saidx, min) != NULL) { + if (!key_checkspidup(htonl(min))) { ipseclog((LOG_DEBUG, "%s: SPI %u exists already.\n", - __func__, min)); + __func__, min)); return 0; } count--; /* taking one cost. */ newspi = min; - } else { /* init SPI */ @@ -4933,59 +4930,269 @@ key_do_getnewspi(spirange, saidx) while (count--) { /* generate pseudo-random SPI value ranged. */ newspi = min + (key_random() % (max - min + 1)); - - if (key_checkspidup(saidx, newspi) == NULL) + if (!key_checkspidup(htonl(newspi))) break; } if (count == 0 || newspi == 0) { - ipseclog((LOG_DEBUG, "%s: to allocate spi is failed.\n", - __func__)); + ipseclog((LOG_DEBUG, + "%s: failed to allocate SPI.\n", __func__)); return 0; } } /* statistics */ keystat.getspi_count = - (keystat.getspi_count + V_key_spi_trycnt - count) / 2; + (keystat.getspi_count + V_key_spi_trycnt - count) / 2; - return newspi; + return (htonl(newspi)); } /* - * SADB_UPDATE processing - * receive - * - * from the ikmpd, and update a secasvar entry whose status is SADB_SASTATE_LARVAL. - * and send - * - * to the ikmpd. - * - * m will always be freed. + * Find TCP-MD5 SA with corresponding secasindex. + * If not found, return NULL and fill SPI with usable value if needed. */ -static int -key_update(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +static struct secasvar * +key_getsav_tcpmd5(struct secasindex *saidx, uint32_t *spi) { - struct sadb_sa *sa0; - struct sadb_address *src0, *dst0; -#ifdef IPSEC_NAT_T - struct sadb_x_nat_t_type *type; - struct sadb_x_nat_t_port *sport, *dport; - struct sadb_address *iaddr, *raddr; - struct sadb_x_nat_t_frag *frag; -#endif - struct secasindex saidx; + SAHTREE_RLOCK_TRACKER; struct secashead *sah; struct secasvar *sav; - u_int16_t proto; - u_int8_t mode; - u_int32_t reqid; + + IPSEC_ASSERT(saidx->proto == IPPROTO_TCP, ("wrong proto")); + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + if (sah->saidx.proto != IPPROTO_TCP) + continue; + if (!key_sockaddrcmp(&saidx->dst.sa, &sah->saidx.dst.sa, 0)) + break; + } + if (sah != NULL) { + if (V_key_preferred_oldsa) + sav = TAILQ_LAST(&sah->savtree_alive, secasvar_queue); + else + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav != NULL) { + SAV_ADDREF(sav); + SAHTREE_RUNLOCK(); + return (sav); + } + } + if (spi == NULL) { + /* No SPI required */ + SAHTREE_RUNLOCK(); + return (NULL); + } + /* Check that SPI is unique */ + LIST_FOREACH(sav, SAVHASH_HASH(*spi), spihash) { + if (sav->spi == *spi) + break; + } + if (sav == NULL) { + SAHTREE_RUNLOCK(); + /* SPI is already unique */ + return (NULL); + } + SAHTREE_RUNLOCK(); + /* XXX: not optimal */ + *spi = key_do_getnewspi(NULL, saidx); + return (NULL); +} + +static int +key_updateaddresses(struct socket *so, struct mbuf *m, + const struct sadb_msghdr *mhp, struct secasvar *sav, + struct secasindex *saidx) +{ + struct sockaddr *newaddr; + struct secashead *sah; + struct secasvar *newsav, *tmp; + struct mbuf *n; + int error, isnew; + + /* Check that we need to change SAH */ + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_SRC)) { + newaddr = (struct sockaddr *)( + ((struct sadb_address *) + mhp->ext[SADB_X_EXT_NEW_ADDRESS_SRC]) + 1); + bcopy(newaddr, &saidx->src, newaddr->sa_len); + key_porttosaddr(&saidx->src.sa, 0); + } + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_DST)) { + newaddr = (struct sockaddr *)( + ((struct sadb_address *) + mhp->ext[SADB_X_EXT_NEW_ADDRESS_DST]) + 1); + bcopy(newaddr, &saidx->dst, newaddr->sa_len); + key_porttosaddr(&saidx->dst.sa, 0); + } + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_SRC) || + !SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_DST)) { + error = key_checksockaddrs(&saidx->src.sa, &saidx->dst.sa); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid new sockaddr.\n", + __func__)); + return (error); + } + + sah = key_getsah(saidx); + if (sah == NULL) { + /* create a new SA index */ + sah = key_newsah(saidx); + if (sah == NULL) { + ipseclog((LOG_DEBUG, + "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } + isnew = 2; /* SAH is new */ + } else + isnew = 1; /* existing SAH is referenced */ + } else { + /* + * src and dst addresses are still the same. + * Do we want to change NAT-T config? + */ + if (sav->sah->saidx.proto != IPPROTO_ESP || + SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_TYPE) || + SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_SPORT) || + SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_DPORT)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); + return (EINVAL); + } + /* We hold reference to SA, thus SAH will be referenced too. */ + sah = sav->sah; + isnew = 0; + } + + newsav = malloc(sizeof(struct secasvar), M_IPSEC_SA, + M_NOWAIT | M_ZERO); + if (newsav == NULL) { + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + error = ENOBUFS; + goto fail; + } + + /* Clone SA's content into newsav */ + SAV_INITREF(newsav); + bcopy(sav, newsav, offsetof(struct secasvar, chain)); + /* + * We create new NAT-T config if it is needed. + * Old NAT-T config will be freed by key_cleansav() when + * last reference to SA will be released. + */ + newsav->natt = NULL; + newsav->sah = sah; + newsav->state = SADB_SASTATE_MATURE; + error = key_setnatt(sav, mhp); + if (error != 0) + goto fail; + + SAHTREE_WLOCK(); + /* Check that SA is still alive */ + if (sav->state == SADB_SASTATE_DEAD) { + /* SA was unlinked */ + SAHTREE_WUNLOCK(); + error = ESRCH; + goto fail; + } + + /* Unlink SA from SAH and SPI hash */ + IPSEC_ASSERT((sav->flags & SADB_X_EXT_F_CLONED) == 0, + ("SA is already cloned")); + IPSEC_ASSERT(sav->state == SADB_SASTATE_MATURE || + sav->state == SADB_SASTATE_DYING, + ("Wrong SA state %u\n", sav->state)); + TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + + /* + * Link new SA with SAH. Keep SAs ordered by + * create time (newer are first). + */ + TAILQ_FOREACH(tmp, &sah->savtree_alive, chain) { + if (newsav->created > tmp->created) { + TAILQ_INSERT_BEFORE(tmp, newsav, chain); + break; + } + } + if (tmp == NULL) + TAILQ_INSERT_TAIL(&sah->savtree_alive, newsav, chain); + + /* Add new SA into SPI hash. */ + LIST_INSERT_HEAD(SAVHASH_HASH(newsav->spi), newsav, spihash); + + /* Add new SAH into SADB. */ + if (isnew == 2) { + TAILQ_INSERT_HEAD(&V_sahtree, sah, chain); + LIST_INSERT_HEAD(SAHADDRHASH_HASH(saidx), sah, addrhash); + sah->state = SADB_SASTATE_MATURE; + SAH_ADDREF(sah); /* newsav references new SAH */ + } + /* + * isnew == 1 -> @sah was referenced by key_getsah(). + * isnew == 0 -> we use the same @sah, that was used by @sav, + * and we use its reference for @newsav. + */ + SECASVAR_LOCK(sav); + /* XXX: replace cntr with pointer? */ + newsav->cntr = sav->cntr; + sav->flags |= SADB_X_EXT_F_CLONED; + SECASVAR_UNLOCK(sav); + + SAHTREE_WUNLOCK(); + + KEYDBG(KEY_STAMP, + printf("%s: SA(%p) cloned into SA(%p)\n", + __func__, sav, newsav)); + KEYDBG(KEY_DATA, kdebug_secasv(newsav)); + + key_freesav(&sav); /* release last reference */ + + /* set msg buf from mhp */ + n = key_getmsgbuf_x1(m, mhp); + if (n == NULL) { + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } + m_freem(m); + key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + return (0); +fail: + if (isnew != 0) + key_freesah(&sah); + if (newsav != NULL) { + if (newsav->natt != NULL) + free(newsav->natt, M_IPSEC_MISC); + free(newsav, M_IPSEC_SA); + } + return (error); +} + +/* + * SADB_UPDATE processing + * receive + * + * from the ikmpd, and update a secasvar entry whose status is SADB_SASTATE_LARVAL. + * and send + * + * to the ikmpd. + * + * m will always be freed. + */ +static int +key_update(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) +{ + struct secasindex saidx; + struct sadb_address *src0, *dst0; + struct sadb_sa *sa0; + struct secasvar *sav; + uint32_t reqid; int error; + uint8_t mode, proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); @@ -4995,199 +5202,182 @@ key_update(so, m, mhp) /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_SA] == NULL || - mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && - mhp->ext[SADB_EXT_KEY_ENCRYPT] == NULL) || - (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && - mhp->ext[SADB_EXT_KEY_AUTH] == NULL) || - (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL && - mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) || - (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL && - mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_SA) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT)) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD))) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || - mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_SA) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_X_EXT_SA2] != NULL) { - mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; - reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; - } else { + if (SADB_CHECKHDR(mhp, SADB_X_EXT_SA2)) { mode = IPSEC_MODE_ANY; reqid = 0; + } else { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA2)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return key_senderror(so, m, EINVAL); + } + mode = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + reqid = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } - /* XXX boundary checking for other extensions */ sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. + * Only SADB_SASTATE_MATURE SAs may be submitted in an + * SADB_UPDATE message. */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - - if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) || - mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - type = (struct sadb_x_nat_t_type *) - mhp->ext[SADB_X_EXT_NAT_T_TYPE]; - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - } else { - type = 0; - sport = dport = 0; - } - if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) { - if (mhp->extlen[SADB_X_EXT_NAT_T_OAI] < sizeof(*iaddr) || - mhp->extlen[SADB_X_EXT_NAT_T_OAR] < sizeof(*raddr)) { - ipseclog((LOG_DEBUG, "%s: invalid message\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - iaddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI]; - raddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR]; - ipseclog((LOG_DEBUG, "%s: NAT-T OAi/r present\n", __func__)); - } else { - iaddr = raddr = NULL; - } - if (mhp->ext[SADB_X_EXT_NAT_T_FRAG] != NULL) { - if (mhp->extlen[SADB_X_EXT_NAT_T_FRAG] < sizeof(*frag)) { - ipseclog((LOG_DEBUG, "%s: invalid message\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - frag = (struct sadb_x_nat_t_frag *) - mhp->ext[SADB_X_EXT_NAT_T_FRAG]; - } else { - frag = 0; - } + if (sa0->sadb_sa_state != SADB_SASTATE_MATURE) { + ipseclog((LOG_DEBUG, "%s: invalid state.\n", __func__)); +#ifdef PFKEY_STRICT_CHECKS + return key_senderror(so, m, EINVAL); #endif - - /* get a SA header */ - if ((sah = key_getsah(&saidx)) == NULL) { - ipseclog((LOG_DEBUG, "%s: no SA index found.\n", __func__)); - return key_senderror(so, m, ENOENT); } - - /* set spidx if there */ - /* XXX rewrite */ - error = key_setident(sah, m, mhp); - if (error) + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); return key_senderror(so, m, error); - - /* find a SA with sequence number. */ -#ifdef IPSEC_DOSEQCHECK - if (mhp->msg->sadb_msg_seq != 0 - && (sav = key_getsavbyseq(sah, mhp->msg->sadb_msg_seq)) == NULL) { - ipseclog((LOG_DEBUG, "%s: no larval SA with sequence %u " - "exists.\n", __func__, mhp->msg->sadb_msg_seq)); - return key_senderror(so, m, ENOENT); } -#else - SAHTREE_LOCK(); - sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); - SAHTREE_UNLOCK(); + KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); + sav = key_getsavbyspi(sa0->sadb_sa_spi); if (sav == NULL) { - ipseclog((LOG_DEBUG, "%s: no such a SA found (spi:%u)\n", - __func__, (u_int32_t)ntohl(sa0->sadb_sa_spi))); - return key_senderror(so, m, EINVAL); - } -#endif - - /* validity check */ - if (sav->sah->saidx.proto != proto) { - ipseclog((LOG_DEBUG, "%s: protocol mismatched " - "(DB=%u param=%u)\n", __func__, - sav->sah->saidx.proto, proto)); - return key_senderror(so, m, EINVAL); - } -#ifdef IPSEC_DOSEQCHECK - if (sav->spi != sa0->sadb_sa_spi) { - ipseclog((LOG_DEBUG, "%s: SPI mismatched (DB:%u param:%u)\n", - __func__, - (u_int32_t)ntohl(sav->spi), - (u_int32_t)ntohl(sa0->sadb_sa_spi))); + ipseclog((LOG_DEBUG, "%s: no SA found for SPI %u\n", + __func__, ntohl(sa0->sadb_sa_spi))); return key_senderror(so, m, EINVAL); } -#endif + /* + * Check that SADB_UPDATE issued by the same process that did + * SADB_GETSPI or SADB_ADD. + */ if (sav->pid != mhp->msg->sadb_msg_pid) { - ipseclog((LOG_DEBUG, "%s: pid mismatched (DB:%u param:%u)\n", - __func__, sav->pid, mhp->msg->sadb_msg_pid)); + ipseclog((LOG_DEBUG, + "%s: pid mismatched (SPI %u, pid %u vs. %u)\n", __func__, + ntohl(sav->spi), sav->pid, mhp->msg->sadb_msg_pid)); + key_freesav(&sav); return key_senderror(so, m, EINVAL); } - - /* copy sav values */ - error = key_setsaval(sav, m, mhp); - if (error) { - KEY_FREESAV(&sav); - return key_senderror(so, m, error); - } - -#ifdef IPSEC_NAT_T - /* - * Handle more NAT-T info if present, - * now that we have a sav to fill. - */ - if (type) - sav->natt_type = type->sadb_x_nat_t_type_type; - - if (sport) - KEY_PORTTOSADDR(&sav->sah->saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&sav->sah->saidx.dst, - dport->sadb_x_nat_t_port_port); - -#if 0 - /* - * In case SADB_X_EXT_NAT_T_FRAG was not given, leave it at 0. - * We should actually check for a minimum MTU here, if we - * want to support it in ip_output. - */ - if (frag) - sav->natt_esp_frag_len = frag->sadb_x_nat_t_frag_fraglen; -#endif -#endif - - /* check SA values to be mature. */ - if ((mhp->msg->sadb_msg_errno = key_mature(sav)) != 0) { - KEY_FREESAV(&sav); - return key_senderror(so, m, 0); + /* saidx should match with SA. */ + if (key_cmpsaidx(&sav->sah->saidx, &saidx, CMP_MODE_REQID) == 0) { + ipseclog((LOG_DEBUG, "%s: saidx mismatched for SPI %u", + __func__, ntohl(sav->spi))); + key_freesav(&sav); + return key_senderror(so, m, ESRCH); + } + + if (sav->state == SADB_SASTATE_LARVAL) { + if ((mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && + SADB_CHECKHDR(mhp, SADB_EXT_KEY_ENCRYPT)) || + (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && + SADB_CHECKHDR(mhp, SADB_EXT_KEY_AUTH))) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); + key_freesav(&sav); + return key_senderror(so, m, EINVAL); + } + /* + * We can set any values except src, dst and SPI. + */ + error = key_setsaval(sav, mhp); + if (error != 0) { + key_freesav(&sav); + return (key_senderror(so, m, error)); + } + /* Change SA state to MATURE */ + SAHTREE_WLOCK(); + if (sav->state != SADB_SASTATE_LARVAL) { + /* SA was deleted or another thread made it MATURE. */ + SAHTREE_WUNLOCK(); + key_freesav(&sav); + return (key_senderror(so, m, ESRCH)); + } + /* + * NOTE: we keep SAs in savtree_alive ordered by created + * time. When SA's state changed from LARVAL to MATURE, + * we update its created time in key_setsaval() and move + * it into head of savtree_alive. + */ + TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); + TAILQ_INSERT_HEAD(&sav->sah->savtree_alive, sav, chain); + sav->state = SADB_SASTATE_MATURE; + SAHTREE_WUNLOCK(); + } else { + /* + * For DYING and MATURE SA we can change only state + * and lifetimes. Report EINVAL if something else attempted + * to change. + */ + if (!SADB_CHECKHDR(mhp, SADB_EXT_KEY_ENCRYPT) || + !SADB_CHECKHDR(mhp, SADB_EXT_KEY_AUTH)) { + key_freesav(&sav); + return (key_senderror(so, m, EINVAL)); + } + error = key_updatelifetimes(sav, mhp); + if (error != 0) { + key_freesav(&sav); + return (key_senderror(so, m, error)); + } + /* + * This is FreeBSD extension to RFC2367. + * IKEd can specify SADB_X_EXT_NEW_ADDRESS_SRC and/or + * SADB_X_EXT_NEW_ADDRESS_DST when it wants to change + * SA addresses (for example to implement MOBIKE protocol + * as described in RFC4555). Also we allow to change + * NAT-T config. + */ + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_SRC) || + !SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_DST) || + !SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_TYPE) || + sav->natt != NULL) { + error = key_updateaddresses(so, m, mhp, sav, &saidx); + key_freesav(&sav); + if (error != 0) + return (key_senderror(so, m, error)); + return (0); + } + /* Check that SA is still alive */ + SAHTREE_WLOCK(); + if (sav->state == SADB_SASTATE_DEAD) { + /* SA was unlinked */ + SAHTREE_WUNLOCK(); + key_freesav(&sav); + return (key_senderror(so, m, ESRCH)); + } + /* + * NOTE: there is possible state moving from DYING to MATURE, + * but this doesn't change created time, so we won't reorder + * this SA. + */ + sav->state = SADB_SASTATE_MATURE; + SAHTREE_WUNLOCK(); } + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); + key_freesav(&sav); { struct mbuf *n; @@ -5204,42 +5394,6 @@ key_update(so, m, mhp) } } -/* - * search SAD with sequence for a SA which state is SADB_SASTATE_LARVAL. - * only called by key_update(). - * OUT: - * NULL : not found - * others : found, pointer to a SA. - */ -#ifdef IPSEC_DOSEQCHECK -static struct secasvar * -key_getsavbyseq(sah, seq) - struct secashead *sah; - u_int32_t seq; -{ - struct secasvar *sav; - u_int state; - - state = SADB_SASTATE_LARVAL; - - /* search SAD with sequence number ? */ - LIST_FOREACH(sav, &sah->savtree[state], chain) { - - KEY_CHKSASTATE(state, sav->state, __func__); - - if (sav->seq == seq) { - sa_addref(sav); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s cause refcnt++:%d SA:%p\n", - __func__, sav->refcnt, sav)); - return sav; - } - } - - return NULL; -} -#endif - /* * SADB_ADD processing * add an entry to SA database, when received @@ -5256,24 +5410,14 @@ key_getsavbyseq(sah, seq) * m will always be freed. */ static int -key_add(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_add(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - struct sadb_sa *sa0; - struct sadb_address *src0, *dst0; -#ifdef IPSEC_NAT_T - struct sadb_x_nat_t_type *type; - struct sadb_address *iaddr, *raddr; - struct sadb_x_nat_t_frag *frag; -#endif struct secasindex saidx; - struct secashead *newsah; - struct secasvar *newsav; - u_int16_t proto; - u_int8_t mode; - u_int32_t reqid; + struct sadb_address *src0, *dst0; + struct sadb_sa *sa0; + struct secasvar *sav; + uint32_t reqid, spi; + uint8_t mode, proto; int error; IPSEC_ASSERT(so != NULL, ("null socket")); @@ -5284,176 +5428,115 @@ key_add(so, m, mhp) /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_SA] == NULL || - mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && - mhp->ext[SADB_EXT_KEY_ENCRYPT] == NULL) || - (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && - mhp->ext[SADB_EXT_KEY_AUTH] == NULL) || - (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL && - mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) || - (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL && - mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_SA) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && ( + SADB_CHECKHDR(mhp, SADB_EXT_KEY_ENCRYPT) || + SADB_CHECKLEN(mhp, SADB_EXT_KEY_ENCRYPT))) || + (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && ( + SADB_CHECKHDR(mhp, SADB_EXT_KEY_AUTH) || + SADB_CHECKLEN(mhp, SADB_EXT_KEY_AUTH))) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT)) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD))) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || - mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - /* XXX need more */ - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_SA) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_X_EXT_SA2] != NULL) { - mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; - reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; - } else { + if (SADB_CHECKHDR(mhp, SADB_X_EXT_SA2)) { mode = IPSEC_MODE_ANY; reqid = 0; + } else { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA2)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return key_senderror(so, m, EINVAL); + } + mode = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + reqid = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. + * Only SADB_SASTATE_MATURE SAs may be submitted in an + * SADB_ADD message. */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) || - mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - type = (struct sadb_x_nat_t_type *) - mhp->ext[SADB_X_EXT_NAT_T_TYPE]; - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); - } else { - type = 0; + if (sa0->sadb_sa_state != SADB_SASTATE_MATURE) { + ipseclog((LOG_DEBUG, "%s: invalid state.\n", __func__)); +#ifdef PFKEY_STRICT_CHECKS + return key_senderror(so, m, EINVAL); +#endif } - if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) { - if (mhp->extlen[SADB_X_EXT_NAT_T_OAI] < sizeof(*iaddr) || - mhp->extlen[SADB_X_EXT_NAT_T_OAR] < sizeof(*raddr)) { - ipseclog((LOG_DEBUG, "%s: invalid message\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - iaddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI]; - raddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR]; - ipseclog((LOG_DEBUG, "%s: NAT-T OAi/r present\n", __func__)); - } else { - iaddr = raddr = NULL; + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, error); } - if (mhp->ext[SADB_X_EXT_NAT_T_FRAG] != NULL) { - if (mhp->extlen[SADB_X_EXT_NAT_T_FRAG] < sizeof(*frag)) { - ipseclog((LOG_DEBUG, "%s: invalid message\n", + KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); + spi = sa0->sadb_sa_spi; + /* + * For TCP-MD5 SAs we don't use SPI. Check the uniqueness using + * secasindex. + * XXXAE: IPComp seems also doesn't use SPI. + */ + if (proto == IPPROTO_TCP) { + sav = key_getsav_tcpmd5(&saidx, &spi); + if (sav == NULL && spi == 0) { + /* Failed to allocate SPI */ + ipseclog((LOG_DEBUG, "%s: SA already exists.\n", __func__)); - return key_senderror(so, m, EINVAL); + return key_senderror(so, m, EEXIST); } - frag = (struct sadb_x_nat_t_frag *) - mhp->ext[SADB_X_EXT_NAT_T_FRAG]; + /* XXX: SPI that we report back can have another value */ } else { - frag = 0; - } -#endif - - /* get a SA header */ - if ((newsah = key_getsah(&saidx)) == NULL) { - /* create a new SA header */ - if ((newsah = key_newsah(&saidx)) == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); - return key_senderror(so, m, ENOBUFS); - } + /* We can create new SA only if SPI is different. */ + sav = key_getsavbyspi(spi); } - - /* set spidx if there */ - /* XXX rewrite */ - error = key_setident(newsah, m, mhp); - if (error) { - return key_senderror(so, m, error); - } - - /* create new SA entry. */ - /* We can create new SA only if SPI is differenct. */ - SAHTREE_LOCK(); - newsav = key_getsavbyspi(newsah, sa0->sadb_sa_spi); - SAHTREE_UNLOCK(); - if (newsav != NULL) { + if (sav != NULL) { + key_freesav(&sav); ipseclog((LOG_DEBUG, "%s: SA already exists.\n", __func__)); return key_senderror(so, m, EEXIST); } - newsav = KEY_NEWSAV(m, mhp, newsah, &error); - if (newsav == NULL) { - return key_senderror(so, m, error); - } -#ifdef IPSEC_NAT_T - /* - * Handle more NAT-T info if present, - * now that we have a sav to fill. - */ - if (type) - newsav->natt_type = type->sadb_x_nat_t_type_type; - -#if 0 - /* - * In case SADB_X_EXT_NAT_T_FRAG was not given, leave it at 0. - * We should actually check for a minimum MTU here, if we - * want to support it in ip_output. - */ - if (frag) - newsav->natt_esp_frag_len = frag->sadb_x_nat_t_frag_fraglen; -#endif -#endif - - /* check SA values to be mature. */ - if ((error = key_mature(newsav)) != 0) { - KEY_FREESAV(&newsav); + sav = key_newsav(mhp, &saidx, spi, &error); + if (sav == NULL) return key_senderror(so, m, error); - } - + KEYDBG(KEY_STAMP, + printf("%s: return SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); /* - * don't call key_freesav() here, as we would like to keep the SA - * in the database on success. + * If SADB_ADD was in response to SADB_ACQUIRE, we need to schedule + * ACQ for deletion. */ + if (sav->seq != 0) + key_acqdone(&saidx, sav->seq); { + /* + * Don't call key_freesav() on error here, as we would like to + * keep the SA in the database. + */ struct mbuf *n; /* set msg buf from mhp */ @@ -5463,38 +5546,204 @@ key_add(so, m, mhp) return key_senderror(so, m, ENOBUFS); } - m_freem(m); - return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); - } + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + } +} + +/* + * NAT-T support. + * IKEd may request the use ESP in UDP encapsulation when it detects the + * presence of NAT. It uses NAT-T extension headers for such SAs to specify + * parameters needed for encapsulation and decapsulation. These PF_KEY + * extension headers are not standardized, so this comment addresses our + * implementation. + * SADB_X_EXT_NAT_T_TYPE specifies type of encapsulation, we support only + * UDP_ENCAP_ESPINUDP as described in RFC3948. + * SADB_X_EXT_NAT_T_SPORT/DPORT specifies source and destination ports for + * UDP header. We use these ports in UDP encapsulation procedure, also we + * can check them in UDP decapsulation procedure. + * SADB_X_EXT_NAT_T_OA[IR] specifies original address of initiator or + * responder. These addresses can be used for transport mode to adjust + * checksum after decapsulation and decryption. Since original IP addresses + * used by peer usually different (we detected presence of NAT), TCP/UDP + * pseudo header checksum and IP header checksum was calculated using original + * addresses. After decapsulation and decryption we need to adjust checksum + * to have correct datagram. + * + * We expect presence of NAT-T extension headers only in SADB_ADD and + * SADB_UPDATE messages. We report NAT-T extension headers in replies + * to SADB_ADD, SADB_UPDATE, SADB_GET, and SADB_DUMP messages. + */ +static int +key_setnatt(struct secasvar *sav, const struct sadb_msghdr *mhp) +{ + struct sadb_x_nat_t_port *port; + struct sadb_x_nat_t_type *type; + struct sadb_address *oai, *oar; + struct sockaddr *sa; + uint32_t addr; + uint16_t cksum; + + IPSEC_ASSERT(sav->natt == NULL, ("natt is already initialized")); + /* + * Ignore NAT-T headers if sproto isn't ESP. + */ + if (sav->sah->saidx.proto != IPPROTO_ESP) + return (0); + + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_TYPE) && + !SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_SPORT) && + !SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_DPORT)) { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_TYPE) || + SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_SPORT) || + SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_DPORT)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return (EINVAL); + } + } else + return (0); + + type = (struct sadb_x_nat_t_type *)mhp->ext[SADB_X_EXT_NAT_T_TYPE]; + if (type->sadb_x_nat_t_type_type != UDP_ENCAP_ESPINUDP) { + ipseclog((LOG_DEBUG, "%s: unsupported NAT-T type %u.\n", + __func__, type->sadb_x_nat_t_type_type)); + return (EINVAL); + } + /* + * Allocate storage for NAT-T config. + * On error it will be released by key_cleansav(). + */ + sav->natt = malloc(sizeof(struct secnatt), M_IPSEC_MISC, + M_NOWAIT | M_ZERO); + if (sav->natt == NULL) { + PFKEYSTAT_INC(in_nomem); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } + port = (struct sadb_x_nat_t_port *)mhp->ext[SADB_X_EXT_NAT_T_SPORT]; + if (port->sadb_x_nat_t_port_port == 0) { + ipseclog((LOG_DEBUG, "%s: invalid NAT-T sport specified.\n", + __func__)); + return (EINVAL); + } + sav->natt->sport = port->sadb_x_nat_t_port_port; + port = (struct sadb_x_nat_t_port *)mhp->ext[SADB_X_EXT_NAT_T_DPORT]; + if (port->sadb_x_nat_t_port_port == 0) { + ipseclog((LOG_DEBUG, "%s: invalid NAT-T dport specified.\n", + __func__)); + return (EINVAL); + } + sav->natt->dport = port->sadb_x_nat_t_port_port; + + /* + * SADB_X_EXT_NAT_T_OAI and SADB_X_EXT_NAT_T_OAR are optional + * and needed only for transport mode IPsec. + * Usually NAT translates only one address, but it is possible, + * that both addresses could be translated. + * NOTE: Value of SADB_X_EXT_NAT_T_OAI is equal to SADB_X_EXT_NAT_T_OA. + */ + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_OAI)) { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_OAI)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return (EINVAL); + } + oai = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI]; + } else + oai = NULL; + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_OAR)) { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_OAR)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return (EINVAL); + } + oar = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR]; + } else + oar = NULL; + + /* Initialize addresses only for transport mode */ + if (sav->sah->saidx.mode != IPSEC_MODE_TUNNEL) { + cksum = 0; + if (oai != NULL) { + /* Currently we support only AF_INET */ + sa = (struct sockaddr *)(oai + 1); + if (sa->sa_family != AF_INET || + sa->sa_len != sizeof(struct sockaddr_in)) { + ipseclog((LOG_DEBUG, + "%s: wrong NAT-OAi header.\n", + __func__)); + return (EINVAL); + } + /* Ignore address if it the same */ + if (((struct sockaddr_in *)sa)->sin_addr.s_addr != + sav->sah->saidx.src.sin.sin_addr.s_addr) { + bcopy(sa, &sav->natt->oai.sa, sa->sa_len); + sav->natt->flags |= IPSEC_NATT_F_OAI; + /* Calculate checksum delta */ + addr = sav->sah->saidx.src.sin.sin_addr.s_addr; + cksum = in_addword(cksum, ~addr >> 16); + cksum = in_addword(cksum, ~addr & 0xffff); + addr = sav->natt->oai.sin.sin_addr.s_addr; + cksum = in_addword(cksum, addr >> 16); + cksum = in_addword(cksum, addr & 0xffff); + } + } + if (oar != NULL) { + /* Currently we support only AF_INET */ + sa = (struct sockaddr *)(oar + 1); + if (sa->sa_family != AF_INET || + sa->sa_len != sizeof(struct sockaddr_in)) { + ipseclog((LOG_DEBUG, + "%s: wrong NAT-OAr header.\n", + __func__)); + return (EINVAL); + } + /* Ignore address if it the same */ + if (((struct sockaddr_in *)sa)->sin_addr.s_addr != + sav->sah->saidx.dst.sin.sin_addr.s_addr) { + bcopy(sa, &sav->natt->oar.sa, sa->sa_len); + sav->natt->flags |= IPSEC_NATT_F_OAR; + /* Calculate checksum delta */ + addr = sav->sah->saidx.dst.sin.sin_addr.s_addr; + cksum = in_addword(cksum, ~addr >> 16); + cksum = in_addword(cksum, ~addr & 0xffff); + addr = sav->natt->oar.sin.sin_addr.s_addr; + cksum = in_addword(cksum, addr >> 16); + cksum = in_addword(cksum, addr & 0xffff); + } + } + sav->natt->cksum = cksum; + } + return (0); } -/* m is retained */ static int -key_setident(sah, m, mhp) - struct secashead *sah; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_setident(struct secashead *sah, const struct sadb_msghdr *mhp) { const struct sadb_ident *idsrc, *iddst; int idsrclen, iddstlen; IPSEC_ASSERT(sah != NULL, ("null secashead")); - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* don't make buffer if not there */ - if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL && - mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) { + if (SADB_CHECKHDR(mhp, SADB_EXT_IDENTITY_SRC) && + SADB_CHECKHDR(mhp, SADB_EXT_IDENTITY_DST)) { sah->idents = NULL; sah->identd = NULL; - return 0; + return (0); } - - if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL || - mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) { + + if (SADB_CHECKHDR(mhp, SADB_EXT_IDENTITY_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_IDENTITY_DST)) { ipseclog((LOG_DEBUG, "%s: invalid identity.\n", __func__)); - return EINVAL; + return (EINVAL); } idsrc = (const struct sadb_ident *)mhp->ext[SADB_EXT_IDENTITY_SRC]; @@ -5543,12 +5792,13 @@ key_setident(sah, m, mhp) /* * m will not be freed on return. - * it is caller's responsibility to free the result. + * it is caller's responsibility to free the result. + * + * Called from SADB_ADD and SADB_UPDATE. Reply will contain headers + * from the request in defined order. */ static struct mbuf * -key_getmsgbuf_x1(m, mhp) - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_getmsgbuf_x1(struct mbuf *m, const struct sadb_msghdr *mhp) { struct mbuf *n; @@ -5557,11 +5807,15 @@ key_getmsgbuf_x1(m, mhp) IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* create new sadb_msg to reply. */ - n = key_gather_mbuf(m, mhp, 1, 9, SADB_EXT_RESERVED, + n = key_gather_mbuf(m, mhp, 1, 16, SADB_EXT_RESERVED, SADB_EXT_SA, SADB_X_EXT_SA2, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST, SADB_EXT_LIFETIME_HARD, SADB_EXT_LIFETIME_SOFT, - SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST); + SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST, + SADB_X_EXT_NAT_T_TYPE, SADB_X_EXT_NAT_T_SPORT, + SADB_X_EXT_NAT_T_DPORT, SADB_X_EXT_NAT_T_OAI, + SADB_X_EXT_NAT_T_OAR, SADB_X_EXT_NEW_ADDRESS_SRC, + SADB_X_EXT_NEW_ADDRESS_DST); if (!n) return NULL; @@ -5577,9 +5831,6 @@ key_getmsgbuf_x1(m, mhp) return n; } -static int key_delete_all __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *, u_int16_t)); - /* * SADB_DELETE processing * receive @@ -5592,17 +5843,13 @@ static int key_delete_all __P((struct socket *, struct mbuf *, * m will always be freed. */ static int -key_delete(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_delete(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - struct sadb_sa *sa0; - struct sadb_address *src0, *dst0; struct secasindex saidx; - struct secashead *sah; - struct secasvar *sav = NULL; - u_int16_t proto; + struct sadb_address *src0, *dst0; + struct secasvar *sav; + struct sadb_sa *sa0; + uint8_t proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); @@ -5612,110 +5859,70 @@ key_delete(so, m, mhp) /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } + src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); + dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - if (mhp->ext[SADB_EXT_SA] == NULL) { + if (key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)) != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return (key_senderror(so, m, EINVAL)); + } + KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); + if (SADB_CHECKHDR(mhp, SADB_EXT_SA)) { /* * Caller wants us to delete all non-LARVAL SAs * that match the src/dst. This is used during * IKE INITIAL-CONTACT. + * XXXAE: this looks like some extension to RFC2367. */ ipseclog((LOG_DEBUG, "%s: doing delete all.\n", __func__)); - return key_delete_all(so, m, mhp, proto); - } else if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; - src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); - dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); - } -#endif - - /* get a SA header */ - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) - continue; - - /* get a SA with SPI. */ - sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); - if (sav) - break; + return (key_delete_all(so, m, mhp, &saidx)); } - if (sah == NULL) { - SAHTREE_UNLOCK(); - ipseclog((LOG_DEBUG, "%s: no SA found.\n", __func__)); - return key_senderror(so, m, ENOENT); + if (SADB_CHECKLEN(mhp, SADB_EXT_SA)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); + return (key_senderror(so, m, EINVAL)); } - - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - KEY_FREESAV(&sav); - SAHTREE_UNLOCK(); + sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; + if (proto == IPPROTO_TCP) + sav = key_getsav_tcpmd5(&saidx, NULL); + else + sav = key_getsavbyspi(sa0->sadb_sa_spi); + if (sav == NULL) { + ipseclog((LOG_DEBUG, "%s: no SA found for SPI %u.\n", + __func__, ntohl(sa0->sadb_sa_spi))); + return (key_senderror(so, m, ESRCH)); + } + if (key_cmpsaidx(&sav->sah->saidx, &saidx, CMP_HEAD) == 0) { + ipseclog((LOG_DEBUG, "%s: saidx mismatched for SPI %u.\n", + __func__, ntohl(sav->spi))); + key_freesav(&sav); + return (key_senderror(so, m, ESRCH)); + } + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); + key_unlinksav(sav); + key_freesav(&sav); { struct mbuf *n; struct sadb_msg *newmsg; /* create new sadb_msg to reply. */ - /* XXX-BZ NAT-T extensions? */ n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED, SADB_EXT_SA, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); if (!n) @@ -5739,95 +5946,44 @@ key_delete(so, m, mhp) * delete all SAs for src/dst. Called from key_delete(). */ static int -key_delete_all(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp, - u_int16_t proto) +key_delete_all(struct socket *so, struct mbuf *m, + const struct sadb_msghdr *mhp, struct secasindex *saidx) { - struct sadb_address *src0, *dst0; - struct secasindex saidx; + struct secasvar_queue drainq; struct secashead *sah; struct secasvar *sav, *nextsav; - u_int stateidx, state; - - src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); - dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - - if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); - } -#endif - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) + TAILQ_INIT(&drainq); + SAHTREE_WLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + if (key_cmpsaidx(&sah->saidx, saidx, CMP_HEAD) == 0) continue; - if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) - continue; - - /* Delete all non-LARVAL SAs. */ - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_alive); - stateidx++) { - state = saorder_state_alive[stateidx]; - if (state == SADB_SASTATE_LARVAL) - continue; - for (sav = LIST_FIRST(&sah->savtree[state]); - sav != NULL; sav = nextsav) { - nextsav = LIST_NEXT(sav, chain); - /* sanity check */ - if (sav->state != state) { - ipseclog((LOG_DEBUG, "%s: invalid " - "sav->state (queue %d SA %d)\n", - __func__, state, sav->state)); - continue; - } - - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - KEY_FREESAV(&sav); - } - } + /* Move all ALIVE SAs into drainq */ + TAILQ_CONCAT(&drainq, &sah->savtree_alive, chain); + } + /* Unlink all queued SAs from SPI hash */ + TAILQ_FOREACH(sav, &drainq, chain) { + sav->state = SADB_SASTATE_DEAD; + LIST_REMOVE(sav, spihash); + } + SAHTREE_WUNLOCK(); + /* Now we can release reference for all SAs in drainq */ + sav = TAILQ_FIRST(&drainq); + while (sav != NULL) { + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); + nextsav = TAILQ_NEXT(sav, chain); + key_freesah(&sav->sah); /* release reference from SAV */ + key_freesav(&sav); /* release last reference */ + sav = nextsav; } - SAHTREE_UNLOCK(); + { struct mbuf *n; struct sadb_msg *newmsg; /* create new sadb_msg to reply. */ - /* XXX-BZ NAT-T extensions? */ n = key_gather_mbuf(m, mhp, 1, 3, SADB_EXT_RESERVED, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); if (!n) @@ -5847,6 +6003,52 @@ key_delete_all(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp, } } +/* + * Delete all alive SAs for corresponding xform. + * Larval SAs have not initialized tdb_xform, so it is safe to leave them + * here when xform disappears. + */ +static void +key_delete_xform(const struct xformsw *xsp) +{ + struct secasvar_queue drainq; + struct secashead *sah; + struct secasvar *sav, *nextsav; + + TAILQ_INIT(&drainq); + SAHTREE_WLOCK(); + TAILQ_FOREACH(sah, &V_sahtree, chain) { + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav == NULL) + continue; + if (sav->tdb_xform != xsp) + continue; + /* + * It is supposed that all SAs in the chain are related to + * one xform. + */ + TAILQ_CONCAT(&drainq, &sah->savtree_alive, chain); + } + /* Unlink all queued SAs from SPI hash */ + TAILQ_FOREACH(sav, &drainq, chain) { + sav->state = SADB_SASTATE_DEAD; + LIST_REMOVE(sav, spihash); + } + SAHTREE_WUNLOCK(); + + /* Now we can release reference for all SAs in drainq */ + sav = TAILQ_FIRST(&drainq); + while (sav != NULL) { + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); + nextsav = TAILQ_NEXT(sav, chain); + key_freesah(&sav->sah); /* release reference from SAV */ + key_freesav(&sav); /* release last reference */ + sav = nextsav; + } +} + /* * SADB_GET processing * receive @@ -5860,17 +6062,13 @@ key_delete_all(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp, * m will always be freed. */ static int -key_get(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_get(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - struct sadb_sa *sa0; - struct sadb_address *src0, *dst0; struct secasindex saidx; - struct secashead *sah; - struct secasvar *sav = NULL; - u_int16_t proto; + struct sadb_address *src0, *dst0; + struct sadb_sa *sa0; + struct secasvar *sav; + uint8_t proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); @@ -5884,18 +6082,19 @@ key_get(so, m, mhp) return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_SA] == NULL || - mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_SA) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || - mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_SA) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } @@ -5903,79 +6102,45 @@ key_get(so, m, mhp) src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - - if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); + if (key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)) != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, EINVAL); } -#endif - - /* get a SA header */ - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) - continue; + KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - /* get a SA with SPI. */ - sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); - if (sav) - break; - } - SAHTREE_UNLOCK(); - if (sah == NULL) { + if (proto == IPPROTO_TCP) + sav = key_getsav_tcpmd5(&saidx, NULL); + else + sav = key_getsavbyspi(sa0->sadb_sa_spi); + if (sav == NULL) { ipseclog((LOG_DEBUG, "%s: no SA found.\n", __func__)); - return key_senderror(so, m, ENOENT); + return key_senderror(so, m, ESRCH); + } + if (key_cmpsaidx(&sav->sah->saidx, &saidx, CMP_HEAD) == 0) { + ipseclog((LOG_DEBUG, "%s: saidx mismatched for SPI %u.\n", + __func__, ntohl(sa0->sadb_sa_spi))); + key_freesav(&sav); + return (key_senderror(so, m, ESRCH)); } { struct mbuf *n; - u_int8_t satype; + uint8_t satype; /* map proto to satype */ - if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { + if ((satype = key_proto2satype(sav->sah->saidx.proto)) == 0) { ipseclog((LOG_DEBUG, "%s: there was invalid proto in SAD.\n", - __func__)); + __func__)); + key_freesav(&sav); return key_senderror(so, m, EINVAL); } /* create new sadb_msg to reply. */ n = key_setdumpsa(sav, SADB_GET, satype, mhp->msg->sadb_msg_seq, mhp->msg->sadb_msg_pid); + + key_freesav(&sav); if (!n) return key_senderror(so, m, ENOBUFS); @@ -5986,8 +6151,7 @@ key_get(so, m, mhp) /* XXX make it sysctl-configurable? */ static void -key_getcomb_setlifetime(comb) - struct sadb_comb *comb; +key_getcomb_setlifetime(struct sadb_comb *comb) { comb->sadb_comb_soft_allocations = 1; @@ -6005,10 +6169,10 @@ key_getcomb_setlifetime(comb) * XXX no idea if the user wants ESP authentication or not */ static struct mbuf * -key_getcomb_esp() +key_getcomb_ealg(void) { struct sadb_comb *comb; - struct enc_xform *algo; + const struct enc_xform *algo; struct mbuf *result = NULL, *m, *n; int encmin; int i, off, o; @@ -6017,7 +6181,7 @@ key_getcomb_esp() m = NULL; for (i = 1; i <= SADB_EALG_MAX; i++) { - algo = esp_algorithm_lookup(i); + algo = enc_algorithm_lookup(i); if (algo == NULL) continue; @@ -6034,7 +6198,7 @@ key_getcomb_esp() else { IPSEC_ASSERT(l <= MLEN, ("l=%u > MLEN=%lu", l, (u_long) MLEN)); - MGET(m, M_DONTWAIT, MT_DATA); + MGET(m, M_NOWAIT, MT_DATA); if (m) { M_ALIGN(m, l); m->m_len = l; @@ -6079,11 +6243,8 @@ key_getcomb_esp() } static void -key_getsizes_ah( - const struct auth_hash *ah, - int alg, - u_int16_t* min, - u_int16_t* max) +key_getsizes_ah(const struct auth_hash *ah, int alg, u_int16_t* min, + u_int16_t* max) { *min = *max = ah->keysize; @@ -6113,8 +6274,8 @@ key_getsizes_ah( static struct mbuf * key_getcomb_ah() { + const struct auth_hash *algo; struct sadb_comb *comb; - struct auth_hash *algo; struct mbuf *m; u_int16_t minkeysize, maxkeysize; int i; @@ -6131,7 +6292,7 @@ key_getcomb_ah() i != SADB_X_AALG_SHA2_512) continue; #endif - algo = ah_algorithm_lookup(i); + algo = auth_algorithm_lookup(i); if (!algo) continue; key_getsizes_ah(algo, i, &minkeysize, &maxkeysize); @@ -6142,14 +6303,14 @@ key_getcomb_ah() if (!m) { IPSEC_ASSERT(l <= MLEN, ("l=%u > MLEN=%lu", l, (u_long) MLEN)); - MGET(m, M_DONTWAIT, MT_DATA); + MGET(m, M_NOWAIT, MT_DATA); if (m) { M_ALIGN(m, l); m->m_len = l; m->m_next = NULL; } } else - M_PREPEND(m, l, M_DONTWAIT); + M_PREPEND(m, l, M_NOWAIT); if (!m) return NULL; @@ -6171,29 +6332,29 @@ key_getcomb_ah() static struct mbuf * key_getcomb_ipcomp() { + const struct comp_algo *algo; struct sadb_comb *comb; - struct comp_algo *algo; struct mbuf *m; int i; const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb)); m = NULL; for (i = 1; i <= SADB_X_CALG_MAX; i++) { - algo = ipcomp_algorithm_lookup(i); + algo = comp_algorithm_lookup(i); if (!algo) continue; if (!m) { IPSEC_ASSERT(l <= MLEN, ("l=%u > MLEN=%lu", l, (u_long) MLEN)); - MGET(m, M_DONTWAIT, MT_DATA); + MGET(m, M_NOWAIT, MT_DATA); if (m) { M_ALIGN(m, l); m->m_len = l; m->m_next = NULL; } } else - M_PREPEND(m, l, M_DONTWAIT); + M_PREPEND(m, l, M_NOWAIT); if (!m) return NULL; @@ -6213,8 +6374,7 @@ key_getcomb_ipcomp() * XXX sysctl interface to ipsec_{ah,esp}_keymin */ static struct mbuf * -key_getprop(saidx) - const struct secasindex *saidx; +key_getprop(const struct secasindex *saidx) { struct sadb_prop *prop; struct mbuf *m, *n; @@ -6223,7 +6383,7 @@ key_getprop(saidx) switch (saidx->proto) { case IPPROTO_ESP: - m = key_getcomb_esp(); + m = key_getcomb_ealg(); break; case IPPROTO_AH: m = key_getcomb_ah(); @@ -6237,7 +6397,7 @@ key_getprop(saidx) if (!m) return NULL; - M_PREPEND(m, l, M_DONTWAIT); + M_PREPEND(m, l, M_NOWAIT); if (!m) return NULL; @@ -6260,7 +6420,7 @@ key_getprop(saidx) * * to KMD, and expect to receive - * with SADB_ACQUIRE if error occured, + * with SADB_ACQUIRE if error occurred, * or * with SADB_GETSPI * from KMD by PF_KEY. @@ -6277,40 +6437,26 @@ key_getprop(saidx) static int key_acquire(const struct secasindex *saidx, struct secpolicy *sp) { - struct mbuf *result = NULL, *m; - struct secacq *newacq; - u_int8_t satype; - int error = -1; - u_int32_t seq; + union sockaddr_union addr; + struct mbuf *result, *m; + uint32_t seq; + int error; + uint16_t ul_proto; + uint8_t mask, satype; IPSEC_ASSERT(saidx != NULL, ("null saidx")); satype = key_proto2satype(saidx->proto); IPSEC_ASSERT(satype != 0, ("null satype, protocol %u", saidx->proto)); - /* - * We never do anything about acquirng SA. There is anather - * solution that kernel blocks to send SADB_ACQUIRE message until - * getting something message from IKEd. In later case, to be - * managed with ACQUIRING list. - */ - /* Get an entry to check whether sending message or not. */ - if ((newacq = key_getacq(saidx)) != NULL) { - if (V_key_blockacq_count < newacq->count) { - /* reset counter and do send message. */ - newacq->count = 0; - } else { - /* increment counter and do nothing. */ - newacq->count++; - return 0; - } - } else { - /* make new entry for blocking to send SADB_ACQUIRE. */ - if ((newacq = key_newacq(saidx)) == NULL) - return ENOBUFS; - } + error = -1; + result = NULL; + ul_proto = IPSEC_ULPROTO_ANY; + /* Get seq number to check whether sending message or not. */ + seq = key_getacq(saidx, &error); + if (seq == 0) + return (error); - seq = newacq->seq; m = key_setsadbmsg(SADB_ACQUIRE, 0, satype, seq, 0, 0); if (!m) { error = ENOBUFS; @@ -6319,21 +6465,69 @@ key_acquire(const struct secasindex *saidx, struct secpolicy *sp) result = m; /* - * No SADB_X_EXT_NAT_T_* here: we do not know - * anything related to NAT-T at this time. + * set sadb_address for saidx's. + * + * Note that if sp is supplied, then we're being called from + * key_allocsa_policy() and should supply port and protocol + * information. + * XXXAE: why only TCP and UDP? ICMP and SCTP looks applicable too. + * XXXAE: probably we can handle this in the ipsec[46]_allocsa(). + * XXXAE: it looks like we should save this info in the ACQ entry. */ - - /* set sadb_address for saidx's. */ - m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, - &saidx->src.sa, FULLMASK, IPSEC_ULPROTO_ANY); + if (sp != NULL && (sp->spidx.ul_proto == IPPROTO_TCP || + sp->spidx.ul_proto == IPPROTO_UDP)) + ul_proto = sp->spidx.ul_proto; + + addr = saidx->src; + mask = FULLMASK; + if (ul_proto != IPSEC_ULPROTO_ANY) { + switch (sp->spidx.src.sa.sa_family) { + case AF_INET: + if (sp->spidx.src.sin.sin_port != IPSEC_PORT_ANY) { + addr.sin.sin_port = sp->spidx.src.sin.sin_port; + mask = sp->spidx.prefs; + } + break; + case AF_INET6: + if (sp->spidx.src.sin6.sin6_port != IPSEC_PORT_ANY) { + addr.sin6.sin6_port = + sp->spidx.src.sin6.sin6_port; + mask = sp->spidx.prefs; + } + break; + default: + break; + } + } + m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &addr.sa, mask, ul_proto); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); - m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, - &saidx->dst.sa, FULLMASK, IPSEC_ULPROTO_ANY); + addr = saidx->dst; + mask = FULLMASK; + if (ul_proto != IPSEC_ULPROTO_ANY) { + switch (sp->spidx.dst.sa.sa_family) { + case AF_INET: + if (sp->spidx.dst.sin.sin_port != IPSEC_PORT_ANY) { + addr.sin.sin_port = sp->spidx.dst.sin.sin_port; + mask = sp->spidx.prefd; + } + break; + case AF_INET6: + if (sp->spidx.dst.sin6.sin6_port != IPSEC_PORT_ANY) { + addr.sin6.sin6_port = + sp->spidx.dst.sin6.sin6_port; + mask = sp->spidx.prefd; + } + break; + default: + break; + } + } + m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &addr.sa, mask, ul_proto); if (!m) { error = ENOBUFS; goto fail; @@ -6343,8 +6537,9 @@ key_acquire(const struct secasindex *saidx, struct secpolicy *sp) /* XXX proxy address (optional) */ /* set sadb_x_policy */ - if (sp) { - m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id); + if (sp != NULL) { + m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id, + sp->priority); if (!m) { error = ENOBUFS; goto fail; @@ -6436,6 +6631,10 @@ key_acquire(const struct secasindex *saidx, struct secpolicy *sp) mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + KEYDBG(KEY_DATA, kdebug_secasindex(saidx, NULL)); + return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); fail: @@ -6444,66 +6643,126 @@ key_acquire(const struct secasindex *saidx, struct secpolicy *sp) return error; } -static struct secacq * -key_newacq(const struct secasindex *saidx) +static uint32_t +key_newacq(const struct secasindex *saidx, int *perror) { - struct secacq *newacq; + struct secacq *acq; + uint32_t seq; - /* get new entry */ - newacq = malloc(sizeof(struct secacq), M_IPSEC_SAQ, M_NOWAIT|M_ZERO); - if (newacq == NULL) { + acq = malloc(sizeof(*acq), M_IPSEC_SAQ, M_NOWAIT | M_ZERO); + if (acq == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - return NULL; + *perror = ENOBUFS; + return (0); } /* copy secindex */ - bcopy(saidx, &newacq->saidx, sizeof(newacq->saidx)); - newacq->seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq); - newacq->created = time_second; - newacq->count = 0; + bcopy(saidx, &acq->saidx, sizeof(acq->saidx)); + acq->created = time_second; + acq->count = 0; /* add to acqtree */ ACQ_LOCK(); - LIST_INSERT_HEAD(&V_acqtree, newacq, chain); + seq = acq->seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq); + LIST_INSERT_HEAD(&V_acqtree, acq, chain); + LIST_INSERT_HEAD(ACQADDRHASH_HASH(saidx), acq, addrhash); + LIST_INSERT_HEAD(ACQSEQHASH_HASH(seq), acq, seqhash); ACQ_UNLOCK(); - - return newacq; + *perror = 0; + return (seq); } -static struct secacq * -key_getacq(const struct secasindex *saidx) +static uint32_t +key_getacq(const struct secasindex *saidx, int *perror) { struct secacq *acq; + uint32_t seq; ACQ_LOCK(); - LIST_FOREACH(acq, &V_acqtree, chain) { - if (key_cmpsaidx(saidx, &acq->saidx, CMP_EXACTLY)) + LIST_FOREACH(acq, ACQADDRHASH_HASH(saidx), addrhash) { + if (key_cmpsaidx(&acq->saidx, saidx, CMP_EXACTLY)) { + if (acq->count > V_key_blockacq_count) { + /* + * Reset counter and send message. + * Also reset created time to keep ACQ for + * this saidx. + */ + acq->created = time_second; + acq->count = 0; + seq = acq->seq; + } else { + /* + * Increment counter and do nothing. + * We send SADB_ACQUIRE message only + * for each V_key_blockacq_count packet. + */ + acq->count++; + seq = 0; + } break; + } } ACQ_UNLOCK(); - - return acq; + if (acq != NULL) { + *perror = 0; + return (seq); + } + /* allocate new entry */ + return (key_newacq(saidx, perror)); } -static struct secacq * -key_getacqbyseq(seq) - u_int32_t seq; +static int +key_acqreset(uint32_t seq) +{ + struct secacq *acq; + + ACQ_LOCK(); + LIST_FOREACH(acq, ACQSEQHASH_HASH(seq), seqhash) { + if (acq->seq == seq) { + acq->count = 0; + acq->created = time_second; + break; + } + } + ACQ_UNLOCK(); + if (acq == NULL) + return (ESRCH); + return (0); +} +/* + * Mark ACQ entry as stale to remove it in key_flush_acq(). + * Called after successful SADB_GETSPI message. + */ +static int +key_acqdone(const struct secasindex *saidx, uint32_t seq) { struct secacq *acq; ACQ_LOCK(); - LIST_FOREACH(acq, &V_acqtree, chain) { + LIST_FOREACH(acq, ACQSEQHASH_HASH(seq), seqhash) { if (acq->seq == seq) break; } + if (acq != NULL) { + if (key_cmpsaidx(&acq->saidx, saidx, CMP_EXACTLY) == 0) { + ipseclog((LOG_DEBUG, + "%s: Mismatched saidx for ACQ %u", __func__, seq)); + acq = NULL; + } else { + acq->created = 0; + } + } else { + ipseclog((LOG_DEBUG, + "%s: ACQ %u is not found.", __func__, seq)); + } ACQ_UNLOCK(); - - return acq; + if (acq == NULL) + return (ESRCH); + return (0); } static struct secspacq * -key_newspacq(spidx) - struct secpolicyindex *spidx; +key_newspacq(struct secpolicyindex *spidx) { struct secspacq *acq; @@ -6528,8 +6787,7 @@ key_newspacq(spidx) } static struct secspacq * -key_getspacq(spidx) - struct secpolicyindex *spidx; +key_getspacq(struct secpolicyindex *spidx) { struct secspacq *acq; @@ -6560,16 +6818,15 @@ key_getspacq(spidx) * m will always be freed. */ static int -key_acquire2(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_acquire2(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - const struct sadb_address *src0, *dst0; + SAHTREE_RLOCK_TRACKER; + struct sadb_address *src0, *dst0; struct secasindex saidx; struct secashead *sah; - u_int16_t proto; + uint32_t reqid; int error; + uint8_t mode, proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); @@ -6578,35 +6835,28 @@ key_acquire2(so, m, mhp) /* * Error message from KMd. - * We assume that if error was occured in IKEd, the length of PFKEY + * We assume that if error was occurred in IKEd, the length of PFKEY * message is equal to the size of sadb_msg structure. - * We do not raise error even if error occured in this function. + * We do not raise error even if error occurred in this function. */ - if (mhp->msg->sadb_msg_len == PFKEY_UNIT64(sizeof(struct sadb_msg))) { - struct secacq *acq; - - /* check sequence number */ - if (mhp->msg->sadb_msg_seq == 0) { - ipseclog((LOG_DEBUG, "%s: must specify sequence " - "number.\n", __func__)); - m_freem(m); - return 0; - } - - if ((acq = key_getacqbyseq(mhp->msg->sadb_msg_seq)) == NULL) { + if (mhp->msg->sadb_msg_len == PFKEY_UNIT64(sizeof(struct sadb_msg))) { + /* check sequence number */ + if (mhp->msg->sadb_msg_seq == 0 || + mhp->msg->sadb_msg_errno == 0) { + ipseclog((LOG_DEBUG, "%s: must specify sequence " + "number and errno.\n", __func__)); + } else { /* - * the specified larval SA is already gone, or we got - * a bogus sequence number. we can silently ignore it. + * IKEd reported that error occurred. + * XXXAE: what it expects from the kernel? + * Probably we should send SADB_ACQUIRE again? + * If so, reset ACQ's state. + * XXXAE: it looks useless. */ - m_freem(m); - return 0; + key_acqreset(mhp->msg->sadb_msg_seq); } - - /* reset acq counter in order to deletion by timehander. */ - acq->created = time_second; - acq->count = 0; m_freem(m); - return 0; + return (0); } /* @@ -6616,79 +6866,60 @@ key_acquire2(so, m, mhp) /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - mhp->ext[SADB_EXT_PROPOSAL] == NULL) { - /* error */ - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKHDR(mhp, SADB_EXT_PROPOSAL)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_PROPOSAL] < sizeof(struct sadb_prop)) { - /* error */ - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKLEN(mhp, SADB_EXT_PROPOSAL)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } - src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; - dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifndef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - - if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", + if (SADB_CHECKHDR(mhp, SADB_X_EXT_SA2)) { + mode = IPSEC_MODE_ANY; + reqid = 0; + } else { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA2)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } + mode = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + reqid = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; + } - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; + src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; + dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, EINVAL); } -#endif + KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); /* get a SA index */ - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(&saidx), addrhash) { if (key_cmpsaidx(&sah->saidx, &saidx, CMP_MODE_REQID)) break; } - SAHTREE_UNLOCK(); + SAHTREE_RUNLOCK(); if (sah != NULL) { ipseclog((LOG_DEBUG, "%s: a SA exists already.\n", __func__)); return key_senderror(so, m, EEXIST); @@ -6696,12 +6927,13 @@ key_acquire2(so, m, mhp) error = key_acquire(&saidx, NULL); if (error != 0) { - ipseclog((LOG_DEBUG, "%s: error %d returned from key_acquire\n", - __func__, mhp->msg->sadb_msg_errno)); + ipseclog((LOG_DEBUG, + "%s: error %d returned from key_acquire()\n", + __func__, error)); return key_senderror(so, m, error); } - - return key_sendup_mbuf(so, m, KEY_SENDUP_REGISTERED); + m_freem(m); + return (0); } /* @@ -6718,12 +6950,9 @@ key_acquire2(so, m, mhp) * m will always be freed. */ static int -key_register(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_register(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - struct secreg *reg, *newreg = 0; + struct secreg *reg, *newreg = NULL; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); @@ -6777,14 +7006,14 @@ key_register(so, m, mhp) /* create new sadb_msg to reply. */ alen = 0; for (i = 1; i <= SADB_AALG_MAX; i++) { - if (ah_algorithm_lookup(i)) + if (auth_algorithm_lookup(i)) alen += sizeof(struct sadb_alg); } if (alen) alen += sizeof(struct sadb_supported); elen = 0; for (i = 1; i <= SADB_EALG_MAX; i++) { - if (esp_algorithm_lookup(i)) + if (enc_algorithm_lookup(i)) elen += sizeof(struct sadb_alg); } if (elen) @@ -6795,10 +7024,9 @@ key_register(so, m, mhp) if (len > MCLBYTES) return key_senderror(so, m, ENOBUFS); - MGETHDR(n, M_DONTWAIT, MT_DATA); + MGETHDR(n, M_NOWAIT, MT_DATA); if (len > MHLEN) { - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { + if (!(MCLGET(n, M_NOWAIT))) { m_freem(n); n = NULL; } @@ -6824,10 +7052,10 @@ key_register(so, m, mhp) off += PFKEY_ALIGN8(sizeof(*sup)); for (i = 1; i <= SADB_AALG_MAX; i++) { - struct auth_hash *aalgo; + const struct auth_hash *aalgo; u_int16_t minkeysize, maxkeysize; - aalgo = ah_algorithm_lookup(i); + aalgo = auth_algorithm_lookup(i); if (!aalgo) continue; alg = (struct sadb_alg *)(mtod(n, caddr_t) + off); @@ -6848,14 +7076,14 @@ key_register(so, m, mhp) off += PFKEY_ALIGN8(sizeof(*sup)); for (i = 1; i <= SADB_EALG_MAX; i++) { - struct enc_xform *ealgo; + const struct enc_xform *ealgo; - ealgo = esp_algorithm_lookup(i); + ealgo = enc_algorithm_lookup(i); if (!ealgo) continue; alg = (struct sadb_alg *)(mtod(n, caddr_t) + off); alg->sadb_alg_id = i; - alg->sadb_alg_ivlen = ealgo->blocksize; + alg->sadb_alg_ivlen = ealgo->ivsize; alg->sadb_alg_minbits = _BITS(ealgo->minkey); alg->sadb_alg_maxbits = _BITS(ealgo->maxkey); off += PFKEY_ALIGN8(sizeof(struct sadb_alg)); @@ -6911,21 +7139,21 @@ key_freereg(struct socket *so) * others : error number */ static int -key_expire(struct secasvar *sav) +key_expire(struct secasvar *sav, int hard) { - int s; - int satype; struct mbuf *result = NULL, *m; - int len; - int error = -1; struct sadb_lifetime *lt; - - /* XXX: Why do we lock ? */ - s = splnet(); /*called from softclock()*/ + uint32_t replay_count; + int error, len; + uint8_t satype; IPSEC_ASSERT (sav != NULL, ("null sav")); IPSEC_ASSERT (sav->sah != NULL, ("null sa header")); + KEYDBG(KEY_STAMP, + printf("%s: SA(%p) expired %s lifetime\n", __func__, + sav, hard ? "hard": "soft")); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); /* set msg header */ satype = key_proto2satype(sav->sah->saidx.proto); IPSEC_ASSERT(satype != 0, ("invalid proto, satype %u", satype)); @@ -6945,8 +7173,11 @@ key_expire(struct secasvar *sav) m_cat(result, m); /* create SA extension */ - m = key_setsadbxsa2(sav->sah->saidx.mode, - sav->replay ? sav->replay->count : 0, + SECASVAR_LOCK(sav); + replay_count = sav->replay ? sav->replay->count : 0; + SECASVAR_UNLOCK(sav); + + m = key_setsadbxsa2(sav->sah->saidx.mode, replay_count, sav->sah->saidx.reqid); if (!m) { error = ENOBUFS; @@ -6954,30 +7185,49 @@ key_expire(struct secasvar *sav) } m_cat(result, m); + if (sav->replay && sav->replay->wsize > UINT8_MAX) { + m = key_setsadbxsareplay(sav->replay->wsize); + if (!m) { + error = ENOBUFS; + goto fail; + } + m_cat(result, m); + } + /* create lifetime extension (current and soft) */ len = PFKEY_ALIGN8(sizeof(*lt)) * 2; - m = key_alloc_mbuf(len); - if (!m || m->m_next) { /*XXX*/ - if (m) - m_freem(m); + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) { error = ENOBUFS; goto fail; } + m_align(m, len); + m->m_len = len; bzero(mtod(m, caddr_t), len); lt = mtod(m, struct sadb_lifetime *); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; - lt->sadb_lifetime_allocations = sav->lft_c->allocations; - lt->sadb_lifetime_bytes = sav->lft_c->bytes; - lt->sadb_lifetime_addtime = sav->lft_c->addtime; - lt->sadb_lifetime_usetime = sav->lft_c->usetime; + lt->sadb_lifetime_allocations = + (uint32_t)counter_u64_fetch(sav->lft_c_allocations); + lt->sadb_lifetime_bytes = + counter_u64_fetch(sav->lft_c_bytes); + lt->sadb_lifetime_addtime = sav->created; + lt->sadb_lifetime_usetime = sav->firstused; lt = (struct sadb_lifetime *)(mtod(m, caddr_t) + len / 2); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); - lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; - lt->sadb_lifetime_allocations = sav->lft_s->allocations; - lt->sadb_lifetime_bytes = sav->lft_s->bytes; - lt->sadb_lifetime_addtime = sav->lft_s->addtime; - lt->sadb_lifetime_usetime = sav->lft_s->usetime; + if (hard) { + lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; + lt->sadb_lifetime_allocations = sav->lft_h->allocations; + lt->sadb_lifetime_bytes = sav->lft_h->bytes; + lt->sadb_lifetime_addtime = sav->lft_h->addtime; + lt->sadb_lifetime_usetime = sav->lft_h->usetime; + } else { + lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; + lt->sadb_lifetime_allocations = sav->lft_s->allocations; + lt->sadb_lifetime_bytes = sav->lft_s->bytes; + lt->sadb_lifetime_addtime = sav->lft_s->addtime; + lt->sadb_lifetime_usetime = sav->lft_s->usetime; + } m_cat(result, m); /* set sadb_address for source */ @@ -7002,6 +7252,8 @@ key_expire(struct secasvar *sav) /* * XXX-BZ Handle NAT-T extensions here. + * XXXAE: it doesn't seem quite useful. IKEs should not depend on + * this information, we report only significant SA fields. */ if ((result->m_flags & M_PKTHDR) == 0) { @@ -7024,16 +7276,44 @@ key_expire(struct secasvar *sav) mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); - splx(s); return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); fail: if (result) m_freem(result); - splx(s); return error; } +static void +key_freesah_flushed(struct secashead_queue *flushq) +{ + struct secashead *sah, *nextsah; + struct secasvar *sav, *nextsav; + + sah = TAILQ_FIRST(flushq); + while (sah != NULL) { + sav = TAILQ_FIRST(&sah->savtree_larval); + while (sav != NULL) { + nextsav = TAILQ_NEXT(sav, chain); + TAILQ_REMOVE(&sah->savtree_larval, sav, chain); + key_freesav(&sav); /* release last reference */ + key_freesah(&sah); /* release reference from SAV */ + sav = nextsav; + } + sav = TAILQ_FIRST(&sah->savtree_alive); + while (sav != NULL) { + nextsav = TAILQ_NEXT(sav, chain); + TAILQ_REMOVE(&sah->savtree_alive, sav, chain); + key_freesav(&sav); /* release last reference */ + key_freesah(&sah); /* release reference from SAV */ + sav = nextsav; + } + nextsah = TAILQ_NEXT(sah, chain); + key_freesah(&sah); /* release last reference */ + sah = nextsah; + } +} + /* * SADB_FLUSH processing * receive @@ -7047,17 +7327,14 @@ key_expire(struct secasvar *sav) * m will always be freed. */ static int -key_flush(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_flush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + struct secashead_queue flushq; struct sadb_msg *newmsg; struct secashead *sah, *nextsah; - struct secasvar *sav, *nextsav; - u_int16_t proto; - u_int8_t state; - u_int stateidx; + struct secasvar *sav; + uint8_t proto; + int i; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); @@ -7069,37 +7346,71 @@ key_flush(so, m, mhp) __func__)); return key_senderror(so, m, EINVAL); } - - /* no SATYPE specified, i.e. flushing all SA. */ - SAHTREE_LOCK(); - for (sah = LIST_FIRST(&V_sahtree); - sah != NULL; - sah = nextsah) { - nextsah = LIST_NEXT(sah, chain); - - if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC - && proto != sah->saidx.proto) - continue; - - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_alive); - stateidx++) { - state = saorder_state_any[stateidx]; - for (sav = LIST_FIRST(&sah->savtree[state]); - sav != NULL; - sav = nextsav) { - - nextsav = LIST_NEXT(sav, chain); - - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - KEY_FREESAV(&sav); + KEYDBG(KEY_STAMP, + printf("%s: proto %u\n", __func__, proto)); + + TAILQ_INIT(&flushq); + if (proto == IPSEC_PROTO_ANY) { + /* no SATYPE specified, i.e. flushing all SA. */ + SAHTREE_WLOCK(); + /* Move all SAHs into flushq */ + TAILQ_CONCAT(&flushq, &V_sahtree, chain); + /* Flush all buckets in SPI hash */ + for (i = 0; i < V_savhash_mask + 1; i++) + LIST_INIT(&V_savhashtbl[i]); + /* Flush all buckets in SAHADDRHASH */ + for (i = 0; i < V_sahaddrhash_mask + 1; i++) + LIST_INIT(&V_sahaddrhashtbl[i]); + /* Mark all SAHs as unlinked */ + TAILQ_FOREACH(sah, &flushq, chain) { + sah->state = SADB_SASTATE_DEAD; + /* + * Callout handler makes its job using + * RLOCK and drain queues. In case, when this + * function will be called just before it + * acquires WLOCK, we need to mark SAs as + * unlinked to prevent second unlink. + */ + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + sav->state = SADB_SASTATE_DEAD; + } + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + sav->state = SADB_SASTATE_DEAD; } } - - sah->state = SADB_SASTATE_DEAD; + SAHTREE_WUNLOCK(); + } else { + SAHTREE_WLOCK(); + sah = TAILQ_FIRST(&V_sahtree); + while (sah != NULL) { + IPSEC_ASSERT(sah->state != SADB_SASTATE_DEAD, + ("DEAD SAH %p in SADB_FLUSH", sah)); + nextsah = TAILQ_NEXT(sah, chain); + if (sah->saidx.proto != proto) { + sah = nextsah; + continue; + } + sah->state = SADB_SASTATE_DEAD; + TAILQ_REMOVE(&V_sahtree, sah, chain); + LIST_REMOVE(sah, addrhash); + /* Unlink all SAs from SPI hash */ + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + } + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + } + /* Add SAH into flushq */ + TAILQ_INSERT_HEAD(&flushq, sah, chain); + sah = nextsah; + } + SAHTREE_WUNLOCK(); } - SAHTREE_UNLOCK(); + key_freesah_flushed(&flushq); + /* Free all queued SAs and SAHs */ if (m->m_len < sizeof(struct sadb_msg) || sizeof(struct sadb_msg) > m->m_len + M_TRAILINGSPACE(m)) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); @@ -7130,20 +7441,15 @@ key_flush(so, m, mhp) * m will always be freed. */ static int -key_dump(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_dump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + SAHTREE_RLOCK_TRACKER; struct secashead *sah; struct secasvar *sav; - u_int16_t proto; - u_int stateidx; - u_int8_t satype; - u_int8_t state; - int cnt; struct sadb_msg *newmsg; struct mbuf *n; + uint32_t cnt; + uint8_t proto, satype; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); @@ -7153,79 +7459,73 @@ key_dump(so, m, mhp) /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } /* count sav entries to be sent to the userland. */ cnt = 0; - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC - && proto != sah->saidx.proto) + SAHTREE_RLOCK(); + TAILQ_FOREACH(sah, &V_sahtree, chain) { + if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && + proto != sah->saidx.proto) continue; - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_any); - stateidx++) { - state = saorder_state_any[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - cnt++; - } - } + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) + cnt++; + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) + cnt++; } if (cnt == 0) { - SAHTREE_UNLOCK(); + SAHTREE_RUNLOCK(); return key_senderror(so, m, ENOENT); } /* send this to the userland, one at a time. */ newmsg = NULL; - LIST_FOREACH(sah, &V_sahtree, chain) { - if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC - && proto != sah->saidx.proto) + TAILQ_FOREACH(sah, &V_sahtree, chain) { + if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && + proto != sah->saidx.proto) continue; /* map proto to satype */ if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { - SAHTREE_UNLOCK(); + SAHTREE_RUNLOCK(); ipseclog((LOG_DEBUG, "%s: there was invalid proto in " - "SAD.\n", __func__)); + "SAD.\n", __func__)); return key_senderror(so, m, EINVAL); } - - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_any); - stateidx++) { - state = saorder_state_any[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - n = key_setdumpsa(sav, SADB_DUMP, satype, - --cnt, mhp->msg->sadb_msg_pid); - if (!n) { - SAHTREE_UNLOCK(); - return key_senderror(so, m, ENOBUFS); - } - key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + n = key_setdumpsa(sav, SADB_DUMP, satype, + --cnt, mhp->msg->sadb_msg_pid); + if (n == NULL) { + SAHTREE_RUNLOCK(); + return key_senderror(so, m, ENOBUFS); } + key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + } + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + n = key_setdumpsa(sav, SADB_DUMP, satype, + --cnt, mhp->msg->sadb_msg_pid); + if (n == NULL) { + SAHTREE_RUNLOCK(); + return key_senderror(so, m, ENOBUFS); + } + key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } - SAHTREE_UNLOCK(); - + SAHTREE_RUNLOCK(); m_freem(m); - return 0; + return (0); } - /* * SADB_X_PROMISC processing * * m will always be freed. */ static int -key_promisc(so, m, mhp) - struct socket *so; - struct mbuf *m; - const struct sadb_msghdr *mhp; +key_promisc(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { int olen; @@ -7272,8 +7572,8 @@ key_promisc(so, m, mhp) } } -static int (*key_typesw[]) __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)) = { +static int (*key_typesw[])(struct socket *, struct mbuf *, + const struct sadb_msghdr *) = { NULL, /* SADB_RESERVED */ key_getspi, /* SADB_GETSPI */ key_update, /* SADB_UPDATE */ @@ -7311,9 +7611,7 @@ static int (*key_typesw[]) __P((struct socket *, struct mbuf *, * length for buffer to send to user process. */ int -key_parse(m, so) - struct mbuf *m; - struct socket *so; +key_parse(struct mbuf *m, struct socket *so) { struct sadb_msg *msg; struct sadb_msghdr mh; @@ -7324,12 +7622,6 @@ key_parse(m, so) IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); -#if 0 /*kdebug_sadb assumes msg in linear buffer*/ - KEYDEBUG(KEYDEBUG_KEY_DUMP, - ipseclog((LOG_DEBUG, "%s: passed sadb_msg\n", __func__)); - kdebug_sadb(msg)); -#endif - if (m->m_len < sizeof(struct sadb_msg)) { m = m_pullup(m, sizeof(struct sadb_msg)); if (!m) @@ -7339,8 +7631,7 @@ key_parse(m, so) orglen = PFKEY_UNUNIT64(msg->sadb_msg_len); target = KEY_SENDUP_ONE; - if ((m->m_flags & M_PKTHDR) == 0 || - m->m_pkthdr.len != m->m_pkthdr.len) { + if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len != orglen) { ipseclog((LOG_DEBUG, "%s: invalid message length.\n",__func__)); PFKEYSTAT_INC(out_invlen); error = EINVAL; @@ -7371,10 +7662,9 @@ key_parse(m, so) if (m->m_next) { struct mbuf *n; - MGETHDR(n, M_DONTWAIT, MT_DATA); + MGETHDR(n, M_NOWAIT, MT_DATA); if (n && m->m_pkthdr.len > MHLEN) { - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { + if (!(MCLGET(n, M_NOWAIT))) { m_free(n); n = NULL; } @@ -7397,64 +7687,79 @@ key_parse(m, so) msg = mh.msg; - /* check SA type */ - switch (msg->sadb_msg_satype) { - case SADB_SATYPE_UNSPEC: - switch (msg->sadb_msg_type) { - case SADB_GETSPI: - case SADB_UPDATE: - case SADB_ADD: - case SADB_DELETE: - case SADB_GET: - case SADB_ACQUIRE: - case SADB_EXPIRE: - ipseclog((LOG_DEBUG, "%s: must specify satype " - "when msg type=%u.\n", __func__, - msg->sadb_msg_type)); + /* We use satype as scope mask for spddump */ + if (msg->sadb_msg_type == SADB_X_SPDDUMP) { + switch (msg->sadb_msg_satype) { + case IPSEC_POLICYSCOPE_ANY: + case IPSEC_POLICYSCOPE_GLOBAL: + case IPSEC_POLICYSCOPE_IFNET: + case IPSEC_POLICYSCOPE_PCB: + break; + default: + ipseclog((LOG_DEBUG, "%s: illegal satype=%u\n", + __func__, msg->sadb_msg_type)); PFKEYSTAT_INC(out_invsatype); error = EINVAL; goto senderror; } - break; - case SADB_SATYPE_AH: - case SADB_SATYPE_ESP: - case SADB_X_SATYPE_IPCOMP: - case SADB_X_SATYPE_TCPSIGNATURE: - switch (msg->sadb_msg_type) { - case SADB_X_SPDADD: - case SADB_X_SPDDELETE: - case SADB_X_SPDGET: - case SADB_X_SPDDUMP: - case SADB_X_SPDFLUSH: - case SADB_X_SPDSETIDX: - case SADB_X_SPDUPDATE: - case SADB_X_SPDDELETE2: - ipseclog((LOG_DEBUG, "%s: illegal satype=%u\n", - __func__, msg->sadb_msg_type)); + } else { + switch (msg->sadb_msg_satype) { /* check SA type */ + case SADB_SATYPE_UNSPEC: + switch (msg->sadb_msg_type) { + case SADB_GETSPI: + case SADB_UPDATE: + case SADB_ADD: + case SADB_DELETE: + case SADB_GET: + case SADB_ACQUIRE: + case SADB_EXPIRE: + ipseclog((LOG_DEBUG, "%s: must specify satype " + "when msg type=%u.\n", __func__, + msg->sadb_msg_type)); + PFKEYSTAT_INC(out_invsatype); + error = EINVAL; + goto senderror; + } + break; + case SADB_SATYPE_AH: + case SADB_SATYPE_ESP: + case SADB_X_SATYPE_IPCOMP: + case SADB_X_SATYPE_TCPSIGNATURE: + switch (msg->sadb_msg_type) { + case SADB_X_SPDADD: + case SADB_X_SPDDELETE: + case SADB_X_SPDGET: + case SADB_X_SPDFLUSH: + case SADB_X_SPDSETIDX: + case SADB_X_SPDUPDATE: + case SADB_X_SPDDELETE2: + ipseclog((LOG_DEBUG, "%s: illegal satype=%u\n", + __func__, msg->sadb_msg_type)); + PFKEYSTAT_INC(out_invsatype); + error = EINVAL; + goto senderror; + } + break; + case SADB_SATYPE_RSVP: + case SADB_SATYPE_OSPFV2: + case SADB_SATYPE_RIPV2: + case SADB_SATYPE_MIP: + ipseclog((LOG_DEBUG, "%s: type %u isn't supported.\n", + __func__, msg->sadb_msg_satype)); + PFKEYSTAT_INC(out_invsatype); + error = EOPNOTSUPP; + goto senderror; + case 1: /* XXX: What does it do? */ + if (msg->sadb_msg_type == SADB_X_PROMISC) + break; + /*FALLTHROUGH*/ + default: + ipseclog((LOG_DEBUG, "%s: invalid type %u is passed.\n", + __func__, msg->sadb_msg_satype)); PFKEYSTAT_INC(out_invsatype); error = EINVAL; goto senderror; } - break; - case SADB_SATYPE_RSVP: - case SADB_SATYPE_OSPFV2: - case SADB_SATYPE_RIPV2: - case SADB_SATYPE_MIP: - ipseclog((LOG_DEBUG, "%s: type %u isn't supported.\n", - __func__, msg->sadb_msg_satype)); - PFKEYSTAT_INC(out_invsatype); - error = EOPNOTSUPP; - goto senderror; - case 1: /* XXX: What does it do? */ - if (msg->sadb_msg_type == SADB_X_PROMISC) - break; - /*FALLTHROUGH*/ - default: - ipseclog((LOG_DEBUG, "%s: invalid type %u is passed.\n", - __func__, msg->sadb_msg_satype)); - PFKEYSTAT_INC(out_invsatype); - error = EINVAL; - goto senderror; } /* check field of upper layer protocol and address family */ @@ -7546,7 +7851,7 @@ key_parse(m, so) */ } - if (msg->sadb_msg_type >= sizeof(key_typesw)/sizeof(key_typesw[0]) || + if (msg->sadb_msg_type >= nitems(key_typesw) || key_typesw[msg->sadb_msg_type] == NULL) { PFKEYSTAT_INC(out_invmsgtype); error = EINVAL; @@ -7561,10 +7866,7 @@ senderror: } static int -key_senderror(so, m, code) - struct socket *so; - struct mbuf *m; - int code; +key_senderror(struct socket *so, struct mbuf *m, int code) { struct sadb_msg *msg; @@ -7582,9 +7884,7 @@ key_senderror(so, m, code) * XXX larger-than-MCLBYTES extension? */ static int -key_align(m, mhp) - struct mbuf *m; - struct sadb_msghdr *mhp; +key_align(struct mbuf *m, struct sadb_msghdr *mhp) { struct mbuf *n; struct sadb_ext *ext; @@ -7633,14 +7933,15 @@ key_align(m, mhp) case SADB_EXT_SPIRANGE: case SADB_X_EXT_POLICY: case SADB_X_EXT_SA2: -#ifdef IPSEC_NAT_T case SADB_X_EXT_NAT_T_TYPE: case SADB_X_EXT_NAT_T_SPORT: case SADB_X_EXT_NAT_T_DPORT: case SADB_X_EXT_NAT_T_OAI: case SADB_X_EXT_NAT_T_OAR: case SADB_X_EXT_NAT_T_FRAG: -#endif + case SADB_X_EXT_SA_REPLAY: + case SADB_X_EXT_NEW_ADDRESS_SRC: + case SADB_X_EXT_NEW_ADDRESS_DST: /* duplicate check */ /* * XXX Are there duplication payloads of either @@ -7692,9 +7993,7 @@ key_align(m, mhp) } static int -key_validate_ext(ext, len) - const struct sadb_ext *ext; - int len; +key_validate_ext(const struct sadb_ext *ext, int len) { const struct sockaddr *sa; enum { NONE, ADDR } checktype = NONE; @@ -7705,8 +8004,8 @@ key_validate_ext(ext, len) return EINVAL; /* if it does not match minimum/maximum length, bail */ - if (ext->sadb_ext_type >= sizeof(minsize) / sizeof(minsize[0]) || - ext->sadb_ext_type >= sizeof(maxsize) / sizeof(maxsize[0])) + if (ext->sadb_ext_type >= nitems(minsize) || + ext->sadb_ext_type >= nitems(maxsize)) return EINVAL; if (!minsize[ext->sadb_ext_type] || len < minsize[ext->sadb_ext_type]) return EINVAL; @@ -7718,6 +8017,10 @@ key_validate_ext(ext, len) case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: + case SADB_X_EXT_NAT_T_OAI: + case SADB_X_EXT_NAT_T_OAR: + case SADB_X_EXT_NEW_ADDRESS_SRC: + case SADB_X_EXT_NEW_ADDRESS_DST: baselen = PFKEY_ALIGN8(sizeof(struct sadb_address)); checktype = ADDR; break; @@ -7755,10 +8058,24 @@ key_init(void) { int i; - for (i = 0; i < IPSEC_DIR_MAX; i++) - LIST_INIT(&V_sptree[i]); + for (i = 0; i < IPSEC_DIR_MAX; i++) { + TAILQ_INIT(&V_sptree[i]); + TAILQ_INIT(&V_sptree_ifnet[i]); + } + + V_key_lft_zone = uma_zcreate("IPsec SA lft_c", + sizeof(uint64_t) * 2, NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_PCPU); - LIST_INIT(&V_sahtree); + TAILQ_INIT(&V_sahtree); + V_sphashtbl = hashinit(SPHASH_NHASH, M_IPSEC_SP, &V_sphash_mask); + V_savhashtbl = hashinit(SAVHASH_NHASH, M_IPSEC_SA, &V_savhash_mask); + V_sahaddrhashtbl = hashinit(SAHHASH_NHASH, M_IPSEC_SAH, + &V_sahaddrhash_mask); + V_acqaddrhashtbl = hashinit(ACQHASH_NHASH, M_IPSEC_SAQ, + &V_acqaddrhash_mask); + V_acqseqhashtbl = hashinit(ACQHASH_NHASH, M_IPSEC_SAQ, + &V_acqseqhash_mask); for (i = 0; i <= SADB_SATYPE_MAX; i++) LIST_INIT(&V_regtree[i]); @@ -7766,13 +8083,10 @@ key_init(void) LIST_INIT(&V_acqtree); LIST_INIT(&V_spacqtree); - /* system default */ - V_ip4_def_policy.policy = IPSEC_POLICY_NONE; - V_ip4_def_policy.refcnt++; /*never reclaim this*/ - if (!IS_DEFAULT_VNET(curvnet)) return; + XFORMS_LOCK_INIT(); SPTREE_LOCK_INIT(); REGTREE_LOCK_INIT(); SAHTREE_LOCK_INIT(); @@ -7780,48 +8094,71 @@ key_init(void) SPACQ_LOCK_INIT(); #ifndef IPSEC_DEBUG2 - timeout((void *)key_timehandler, (void *)0, hz); + callout_init(&key_timer, 1); + callout_reset(&key_timer, hz, key_timehandler, NULL); #endif /*IPSEC_DEBUG2*/ /* initialize key statistics */ keystat.getspi_count = 1; - printf("IPsec: Initialized Security Association Processing.\n"); + if (bootverbose) + printf("IPsec: Initialized Security Association Processing.\n"); } #ifdef VIMAGE void key_destroy(void) { + struct secashead_queue sahdrainq; + struct secpolicy_queue drainq; struct secpolicy *sp, *nextsp; struct secacq *acq, *nextacq; struct secspacq *spacq, *nextspacq; - struct secashead *sah, *nextsah; + struct secashead *sah; + struct secasvar *sav; struct secreg *reg; int i; - SPTREE_LOCK(); + /* + * XXX: can we just call free() for each object without + * walking through safe way with releasing references? + */ + TAILQ_INIT(&drainq); + SPTREE_WLOCK(); for (i = 0; i < IPSEC_DIR_MAX; i++) { - for (sp = LIST_FIRST(&V_sptree[i]); - sp != NULL; sp = nextsp) { - nextsp = LIST_NEXT(sp, chain); - if (__LIST_CHAINED(sp)) { - LIST_REMOVE(sp, chain); - free(sp, M_IPSEC_SP); - } + TAILQ_CONCAT(&drainq, &V_sptree[i], chain); + TAILQ_CONCAT(&drainq, &V_sptree_ifnet[i], chain); + } + SPTREE_WUNLOCK(); + sp = TAILQ_FIRST(&drainq); + while (sp != NULL) { + nextsp = TAILQ_NEXT(sp, chain); + key_freesp(&sp); + sp = nextsp; + } + + TAILQ_INIT(&sahdrainq); + SAHTREE_WLOCK(); + TAILQ_CONCAT(&sahdrainq, &V_sahtree, chain); + for (i = 0; i < V_savhash_mask + 1; i++) + LIST_INIT(&V_savhashtbl[i]); + for (i = 0; i < V_sahaddrhash_mask + 1; i++) + LIST_INIT(&V_sahaddrhashtbl[i]); + TAILQ_FOREACH(sah, &sahdrainq, chain) { + sah->state = SADB_SASTATE_DEAD; + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + sav->state = SADB_SASTATE_DEAD; } - } - SPTREE_UNLOCK(); - - SAHTREE_LOCK(); - for (sah = LIST_FIRST(&V_sahtree); sah != NULL; sah = nextsah) { - nextsah = LIST_NEXT(sah, chain); - if (__LIST_CHAINED(sah)) { - LIST_REMOVE(sah, chain); - free(sah, M_IPSEC_SAH); + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + sav->state = SADB_SASTATE_DEAD; } } - SAHTREE_UNLOCK(); + SAHTREE_WUNLOCK(); + + key_freesah_flushed(&sahdrainq); + hashdestroy(V_sphashtbl, M_IPSEC_SP, V_sphash_mask); + hashdestroy(V_savhashtbl, M_IPSEC_SA, V_savhash_mask); + hashdestroy(V_sahaddrhashtbl, M_IPSEC_SAH, V_sahaddrhash_mask); REGTREE_LOCK(); for (i = 0; i <= SADB_SATYPE_MAX; i++) { @@ -7836,12 +8173,12 @@ key_destroy(void) REGTREE_UNLOCK(); ACQ_LOCK(); - for (acq = LIST_FIRST(&V_acqtree); acq != NULL; acq = nextacq) { + acq = LIST_FIRST(&V_acqtree); + while (acq != NULL) { nextacq = LIST_NEXT(acq, chain); - if (__LIST_CHAINED(acq)) { - LIST_REMOVE(acq, chain); - free(acq, M_IPSEC_SAQ); - } + LIST_REMOVE(acq, chain); + free(acq, M_IPSEC_SAQ); + acq = nextacq; } ACQ_UNLOCK(); @@ -7855,56 +8192,31 @@ key_destroy(void) } } SPACQ_UNLOCK(); + hashdestroy(V_acqaddrhashtbl, M_IPSEC_SAQ, V_acqaddrhash_mask); + hashdestroy(V_acqseqhashtbl, M_IPSEC_SAQ, V_acqseqhash_mask); + uma_zdestroy(V_key_lft_zone); } #endif -/* - * XXX: maybe This function is called after INBOUND IPsec processing. - * - * Special check for tunnel-mode packets. - * We must make some checks for consistency between inner and outer IP header. - * - * xxx more checks to be provided - */ -int -key_checktunnelsanity(sav, family, src, dst) - struct secasvar *sav; - u_int family; - caddr_t src; - caddr_t dst; -{ - IPSEC_ASSERT(sav->sah != NULL, ("null SA header")); - - /* XXX: check inner IP header */ - - return 1; -} - /* record data transfer on SA, and update timestamps */ void -key_sa_recordxfer(sav, m) - struct secasvar *sav; - struct mbuf *m; +key_sa_recordxfer(struct secasvar *sav, struct mbuf *m) { IPSEC_ASSERT(sav != NULL, ("Null secasvar")); IPSEC_ASSERT(m != NULL, ("Null mbuf")); - if (!sav->lft_c) - return; /* * XXX Currently, there is a difference of bytes size * between inbound and outbound processing. */ - sav->lft_c->bytes += m->m_pkthdr.len; - /* to check bytes lifetime is done in key_timehandler(). */ + counter_u64_add(sav->lft_c_bytes, m->m_pkthdr.len); /* * We use the number of packets as the unit of * allocations. We increment the variable * whenever {esp,ah}_{in,out}put is called. */ - sav->lft_c->allocations++; - /* XXX check for expires? */ + counter_u64_add(sav->lft_c_allocations, 1); /* * NOTE: We record CURRENT usetime by using wall clock, @@ -7917,92 +8229,8 @@ key_sa_recordxfer(sav, m) * <--------------> HARD * <-----> SOFT */ - sav->lft_c->usetime = time_second; - /* XXX check for expires? */ - - return; -} - -/* dumb version */ -void -key_sa_routechange(dst) - struct sockaddr *dst; -{ - struct secashead *sah; - struct route *ro; - - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - ro = &sah->route_cache.sa_route; - if (ro->ro_rt && dst->sa_len == ro->ro_dst.sa_len - && bcmp(dst, &ro->ro_dst, dst->sa_len) == 0) { - RTFREE(ro->ro_rt); - ro->ro_rt = (struct rtentry *)NULL; - } - } - SAHTREE_UNLOCK(); -} - -static void -key_sa_chgstate(struct secasvar *sav, u_int8_t state) -{ - IPSEC_ASSERT(sav != NULL, ("NULL sav")); - SAHTREE_LOCK_ASSERT(); - - if (sav->state != state) { - if (__LIST_CHAINED(sav)) - LIST_REMOVE(sav, chain); - sav->state = state; - LIST_INSERT_HEAD(&sav->sah->savtree[state], sav, chain); - } -} - -void -key_sa_stir_iv(sav) - struct secasvar *sav; -{ - - IPSEC_ASSERT(sav->iv != NULL, ("null IV")); - key_randomfill(sav->iv, sav->ivlen); -} - -/* XXX too much? */ -static struct mbuf * -key_alloc_mbuf(l) - int l; -{ - struct mbuf *m = NULL, *n; - int len, t; - - len = l; - while (len > 0) { - MGET(n, M_DONTWAIT, MT_DATA); - if (n && len > MLEN) - MCLGET(n, M_DONTWAIT); - if (!n) { - m_freem(m); - return NULL; - } - - n->m_next = NULL; - n->m_len = 0; - n->m_len = M_TRAILINGSPACE(n); - /* use the bottom of mbuf, hoping we can prepend afterwards */ - if (n->m_len > len) { - t = (n->m_len - len) & ~(sizeof(long) - 1); - n->m_data += t; - n->m_len = len; - } - - len -= n->m_len; - - if (m) - m_cat(m, n); - else - m = n; - } - - return m; + if (sav->firstused == 0) + sav->firstused = time_second; } /* @@ -8019,7 +8247,7 @@ key_alloc_mbuf(l) */ static struct mbuf * -key_setkey(struct seckey *src, u_int16_t exttype) +key_setkey(struct seckey *src, uint16_t exttype) { struct mbuf *m; struct sadb_key *p; @@ -8029,9 +8257,11 @@ key_setkey(struct seckey *src, u_int16_t exttype) return NULL; len = PFKEY_ALIGN8(sizeof(struct sadb_key) + _KEYLEN(src)); - m = key_alloc_mbuf(len); + m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return NULL; + m_align(m, len); + m->m_len = len; p = mtod(m, struct sadb_key *); bzero(p, len); p->sadb_key_len = PFKEY_UNIT64(len); @@ -8057,7 +8287,7 @@ key_setkey(struct seckey *src, u_int16_t exttype) */ static struct mbuf * -key_setlifetime(struct seclifetime *src, u_int16_t exttype) +key_setlifetime(struct seclifetime *src, uint16_t exttype) { struct mbuf *m = NULL; struct sadb_lifetime *p; @@ -8066,9 +8296,11 @@ key_setlifetime(struct seclifetime *src, u_int16_t exttype) if (src == NULL) return NULL; - m = key_alloc_mbuf(len); + m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return m; + m_align(m, len); + m->m_len = len; p = mtod(m, struct sadb_lifetime *); bzero(p, len); @@ -8082,3 +8314,104 @@ key_setlifetime(struct seclifetime *src, u_int16_t exttype) return m; } + +const struct enc_xform * +enc_algorithm_lookup(int alg) +{ + int i; + + for (i = 0; i < nitems(supported_ealgs); i++) + if (alg == supported_ealgs[i].sadb_alg) + return (supported_ealgs[i].xform); + return (NULL); +} + +const struct auth_hash * +auth_algorithm_lookup(int alg) +{ + int i; + + for (i = 0; i < nitems(supported_aalgs); i++) + if (alg == supported_aalgs[i].sadb_alg) + return (supported_aalgs[i].xform); + return (NULL); +} + +const struct comp_algo * +comp_algorithm_lookup(int alg) +{ + int i; + + for (i = 0; i < nitems(supported_calgs); i++) + if (alg == supported_calgs[i].sadb_alg) + return (supported_calgs[i].xform); + return (NULL); +} + +/* + * Register a transform. + */ +static int +xform_register(struct xformsw* xsp) +{ + struct xformsw *entry; + + XFORMS_LOCK(); + LIST_FOREACH(entry, &xforms, chain) { + if (entry->xf_type == xsp->xf_type) { + XFORMS_UNLOCK(); + return (EEXIST); + } + } + LIST_INSERT_HEAD(&xforms, xsp, chain); + XFORMS_UNLOCK(); + return (0); +} + +void +xform_attach(void *data) +{ + struct xformsw *xsp = (struct xformsw *)data; + + if (xform_register(xsp) != 0) + printf("%s: failed to register %s xform\n", __func__, + xsp->xf_name); +} + +void +xform_detach(void *data) +{ + struct xformsw *xsp = (struct xformsw *)data; + + XFORMS_LOCK(); + LIST_REMOVE(xsp, chain); + XFORMS_UNLOCK(); + + /* Delete all SAs related to this xform. */ + key_delete_xform(xsp); +} + +/* + * Initialize transform support in an sav. + */ +static int +xform_init(struct secasvar *sav, u_short xftype) +{ + struct xformsw *entry; + int ret; + + IPSEC_ASSERT(sav->tdb_xform == NULL, + ("tdb_xform is already initialized")); + + ret = EINVAL; + XFORMS_LOCK(); + LIST_FOREACH(entry, &xforms, chain) { + if (entry->xf_type == xftype) { + ret = (*entry->xf_init)(sav, entry); + break; + } + } + XFORMS_UNLOCK(); + return (ret); +} + diff --git a/freebsd/sys/netipsec/key.h b/freebsd/sys/netipsec/key.h index f246dbcf..a646832e 100644 --- a/freebsd/sys/netipsec/key.h +++ b/freebsd/sys/netipsec/key.h @@ -37,7 +37,6 @@ struct secpolicy; struct secpolicyindex; -struct ipsecrequest; struct secasvar; struct sockaddr; struct socket; @@ -46,74 +45,44 @@ struct sadb_x_policy; struct secasindex; union sockaddr_union; -extern void key_addref(struct secpolicy *sp); -extern int key_havesp(u_int dir); -extern struct secpolicy *key_allocsp(struct secpolicyindex *, u_int, - const char*, int); -extern struct secpolicy *key_allocsp2(u_int32_t spi, union sockaddr_union *dst, - u_int8_t proto, u_int dir, const char*, int); -extern struct secpolicy *key_newsp(const char*, int); -#if 0 -extern struct secpolicy *key_gettunnel(const struct sockaddr *, - const struct sockaddr *, const struct sockaddr *, - const struct sockaddr *, const char*, int); -#endif -/* NB: prepend with _ for KAME IPv6 compatbility */ -extern void _key_freesp(struct secpolicy **, const char*, int); - -#define KEY_ALLOCSP(spidx, dir) \ - key_allocsp(spidx, dir, __FILE__, __LINE__) -#define KEY_ALLOCSP2(spi, dst, proto, dir) \ - key_allocsp2(spi, dst, proto, dir, __FILE__, __LINE__) -#define KEY_NEWSP() \ - key_newsp(__FILE__, __LINE__) -#if 0 -#define KEY_GETTUNNEL(osrc, odst, isrc, idst) \ - key_gettunnel(osrc, odst, isrc, idst, __FILE__, __LINE__) -#endif -#define KEY_FREESP(spp) \ - _key_freesp(spp, __FILE__, __LINE__) +struct secpolicy *key_newsp(void); +struct secpolicy *key_allocsp(struct secpolicyindex *, u_int); +struct secpolicy *key_msg2sp(struct sadb_x_policy *, size_t, int *); +int key_sp2msg(struct secpolicy *, void *, size_t *); +void key_addref(struct secpolicy *); +void key_freesp(struct secpolicy **); +int key_spdacquire(struct secpolicy *); +int key_havesp(u_int); +void key_bumpspgen(void); +uint32_t key_getspgen(void); +uint32_t key_newreqid(void); -extern struct secasvar *key_allocsa(union sockaddr_union *, u_int, u_int32_t, - const char*, int); -extern void key_addrefsa(struct secasvar *, const char*, int); -extern void key_freesav(struct secasvar **, const char*, int); +struct secasvar *key_allocsa(union sockaddr_union *, uint8_t, uint32_t); +struct secasvar *key_allocsa_tunnel(union sockaddr_union *, + union sockaddr_union *, uint8_t); +struct secasvar *key_allocsa_policy(struct secpolicy *, + const struct secasindex *, int *); +struct secasvar *key_allocsa_tcpmd5(struct secasindex *); +void key_freesav(struct secasvar **); -#define KEY_ALLOCSA(dst, proto, spi) \ - key_allocsa(dst, proto, spi, __FILE__, __LINE__) -#define KEY_ADDREFSA(sav) \ - key_addrefsa(sav, __FILE__, __LINE__) -#define KEY_FREESAV(psav) \ - key_freesav(psav, __FILE__, __LINE__) +int key_sockaddrcmp(const struct sockaddr *, const struct sockaddr *, int); +int key_sockaddrcmp_withmask(const struct sockaddr *, const struct sockaddr *, + size_t); -extern void key_freeso __P((struct socket *)); -extern int key_checktunnelsanity __P((struct secasvar *, u_int, - caddr_t, caddr_t)); -extern int key_checkrequest - __P((struct ipsecrequest *isr, const struct secasindex *)); +int key_register_ifnet(struct secpolicy **, u_int); +void key_unregister_ifnet(struct secpolicy **, u_int); -extern struct secpolicy *key_msg2sp __P((struct sadb_x_policy *, - size_t, int *)); -extern struct mbuf *key_sp2msg __P((struct secpolicy *)); -extern int key_ismyaddr __P((struct sockaddr *)); -extern int key_spdacquire __P((struct secpolicy *)); -extern void key_timehandler __P((void)); -extern u_long key_random __P((void)); -extern void key_randomfill __P((void *, size_t)); -extern void key_freereg __P((struct socket *)); -extern int key_parse __P((struct mbuf *, struct socket *)); -extern void key_init __P((void)); +extern u_long key_random(void); +extern void key_randomfill(void *, size_t); +extern void key_freereg(struct socket *); +extern int key_parse(struct mbuf *, struct socket *); +extern void key_init(void); #ifdef VIMAGE extern void key_destroy(void); #endif -extern void key_sa_recordxfer __P((struct secasvar *, struct mbuf *)); -extern void key_sa_routechange __P((struct sockaddr *)); -extern void key_sa_stir_iv __P((struct secasvar *)); -#ifdef IPSEC_NAT_T -u_int16_t key_portfromsaddr(struct sockaddr *); -#define KEY_PORTFROMSADDR(saddr) \ - key_portfromsaddr((struct sockaddr *)(saddr)) -#endif +extern void key_sa_recordxfer(struct secasvar *, struct mbuf *); +uint16_t key_portfromsaddr(struct sockaddr *); +void key_porttosaddr(struct sockaddr *, uint16_t port); #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IPSEC_SA); diff --git a/freebsd/sys/netipsec/key_debug.c b/freebsd/sys/netipsec/key_debug.c index 0b54ffab..51ac1fdc 100644 --- a/freebsd/sys/netipsec/key_debug.c +++ b/freebsd/sys/netipsec/key_debug.c @@ -38,16 +38,17 @@ #include #endif -#include #include #ifdef _KERNEL #include +#include +#include #include +#include #include #endif #include -#include #include #include @@ -57,6 +58,7 @@ #include #ifdef _KERNEL #include +#include #endif #ifndef _KERNEL @@ -65,17 +67,17 @@ #include #endif /* !_KERNEL */ -static void kdebug_sadb_prop __P((struct sadb_ext *)); -static void kdebug_sadb_identity __P((struct sadb_ext *)); -static void kdebug_sadb_supported __P((struct sadb_ext *)); -static void kdebug_sadb_lifetime __P((struct sadb_ext *)); -static void kdebug_sadb_sa __P((struct sadb_ext *)); -static void kdebug_sadb_address __P((struct sadb_ext *)); -static void kdebug_sadb_key __P((struct sadb_ext *)); -static void kdebug_sadb_x_sa2 __P((struct sadb_ext *)); +static void kdebug_sadb_prop(struct sadb_ext *); +static void kdebug_sadb_identity(struct sadb_ext *); +static void kdebug_sadb_supported(struct sadb_ext *); +static void kdebug_sadb_lifetime(struct sadb_ext *); +static void kdebug_sadb_sa(struct sadb_ext *); +static void kdebug_sadb_address(struct sadb_ext *); +static void kdebug_sadb_key(struct sadb_ext *); +static void kdebug_sadb_x_sa2(struct sadb_ext *); #ifdef _KERNEL -static void kdebug_secreplay __P((struct secreplay *)); +static void kdebug_secreplay(struct secreplay *); #endif #ifndef _KERNEL @@ -86,8 +88,7 @@ static void kdebug_secreplay __P((struct secreplay *)); /* %%%: about struct sadb_msg */ void -kdebug_sadb(base) - struct sadb_msg *base; +kdebug_sadb(struct sadb_msg *base) { struct sadb_ext *ext; int tlen, extlen; @@ -175,8 +176,7 @@ kdebug_sadb(base) } static void -kdebug_sadb_prop(ext) - struct sadb_ext *ext; +kdebug_sadb_prop(struct sadb_ext *ext) { struct sadb_prop *prop = (struct sadb_prop *)ext; struct sadb_comb *comb; @@ -225,8 +225,7 @@ kdebug_sadb_prop(ext) } static void -kdebug_sadb_identity(ext) - struct sadb_ext *ext; +kdebug_sadb_identity(struct sadb_ext *ext) { struct sadb_ident *id = (struct sadb_ident *)ext; int len; @@ -268,8 +267,7 @@ kdebug_sadb_identity(ext) } static void -kdebug_sadb_supported(ext) - struct sadb_ext *ext; +kdebug_sadb_supported(struct sadb_ext *ext) { struct sadb_supported *sup = (struct sadb_supported *)ext; struct sadb_alg *alg; @@ -295,8 +293,7 @@ kdebug_sadb_supported(ext) } static void -kdebug_sadb_lifetime(ext) - struct sadb_ext *ext; +kdebug_sadb_lifetime(struct sadb_ext *ext) { struct sadb_lifetime *lft = (struct sadb_lifetime *)ext; @@ -315,8 +312,7 @@ kdebug_sadb_lifetime(ext) } static void -kdebug_sadb_sa(ext) - struct sadb_ext *ext; +kdebug_sadb_sa(struct sadb_ext *ext) { struct sadb_sa *sa = (struct sadb_sa *)ext; @@ -334,8 +330,7 @@ kdebug_sadb_sa(ext) } static void -kdebug_sadb_address(ext) - struct sadb_ext *ext; +kdebug_sadb_address(struct sadb_ext *ext) { struct sadb_address *addr = (struct sadb_address *)ext; @@ -354,8 +349,7 @@ kdebug_sadb_address(ext) } static void -kdebug_sadb_key(ext) - struct sadb_ext *ext; +kdebug_sadb_key(struct sadb_ext *ext) { struct sadb_key *key = (struct sadb_key *)ext; @@ -383,8 +377,7 @@ kdebug_sadb_key(ext) } static void -kdebug_sadb_x_sa2(ext) - struct sadb_ext *ext; +kdebug_sadb_x_sa2(struct sadb_ext *ext) { struct sadb_x_sa2 *sa2 = (struct sadb_x_sa2 *)ext; @@ -402,8 +395,7 @@ kdebug_sadb_x_sa2(ext) } void -kdebug_sadb_x_policy(ext) - struct sadb_ext *ext; +kdebug_sadb_x_policy(struct sadb_ext *ext) { struct sadb_x_policy *xpl = (struct sadb_x_policy *)ext; struct sockaddr *addr; @@ -469,185 +461,304 @@ kdebug_sadb_x_policy(ext) #ifdef _KERNEL /* %%%: about SPD and SAD */ -void -kdebug_secpolicy(sp) - struct secpolicy *sp; +const char* +kdebug_secpolicy_state(u_int state) { - /* sanity check */ - if (sp == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - printf("secpolicy{ refcnt=%u state=%u policy=%u\n", - sp->refcnt, sp->state, sp->policy); + switch (state) { + case IPSEC_SPSTATE_DEAD: + return ("dead"); + case IPSEC_SPSTATE_LARVAL: + return ("larval"); + case IPSEC_SPSTATE_ALIVE: + return ("alive"); + case IPSEC_SPSTATE_PCB: + return ("pcb"); + case IPSEC_SPSTATE_IFNET: + return ("ifnet"); + } + return ("unknown"); +} - kdebug_secpolicyindex(&sp->spidx); +const char* +kdebug_secpolicy_policy(u_int policy) +{ - switch (sp->policy) { + switch (policy) { case IPSEC_POLICY_DISCARD: - printf(" type=discard }\n"); - break; + return ("discard"); case IPSEC_POLICY_NONE: - printf(" type=none }\n"); - break; + return ("none"); case IPSEC_POLICY_IPSEC: - { - struct ipsecrequest *isr; - for (isr = sp->req; isr != NULL; isr = isr->next) { - - printf(" level=%u\n", isr->level); - kdebug_secasindex(&isr->saidx); - - if (isr->sav != NULL) - kdebug_secasv(isr->sav); - } - printf(" }\n"); - } - break; - case IPSEC_POLICY_BYPASS: - printf(" type=bypass }\n"); - break; + return ("ipsec"); case IPSEC_POLICY_ENTRUST: - printf(" type=entrust }\n"); - break; - default: - printf("%s: Invalid policy found. %d\n", __func__, sp->policy); - break; + return ("entrust"); + case IPSEC_POLICY_BYPASS: + return ("bypass"); } - - return; + return ("unknown"); } -void -kdebug_secpolicyindex(spidx) - struct secpolicyindex *spidx; +const char* +kdebug_secpolicyindex_dir(u_int dir) { - /* sanity check */ - if (spidx == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - printf("secpolicyindex{ dir=%u prefs=%u prefd=%u ul_proto=%u\n", - spidx->dir, spidx->prefs, spidx->prefd, spidx->ul_proto); + switch (dir) { + case IPSEC_DIR_ANY: + return ("any"); + case IPSEC_DIR_INBOUND: + return ("in"); + case IPSEC_DIR_OUTBOUND: + return ("out"); + } + return ("unknown"); +} - ipsec_hexdump((caddr_t)&spidx->src, - ((struct sockaddr *)&spidx->src)->sa_len); - printf("\n"); - ipsec_hexdump((caddr_t)&spidx->dst, - ((struct sockaddr *)&spidx->dst)->sa_len); - printf("}\n"); +const char* +kdebug_ipsecrequest_level(u_int level) +{ - return; + switch (level) { + case IPSEC_LEVEL_DEFAULT: + return ("default"); + case IPSEC_LEVEL_USE: + return ("use"); + case IPSEC_LEVEL_REQUIRE: + return ("require"); + case IPSEC_LEVEL_UNIQUE: + return ("unique"); + } + return ("unknown"); } -void -kdebug_secasindex(saidx) - struct secasindex *saidx; +const char* +kdebug_secasindex_mode(u_int mode) { - /* sanity check */ - if (saidx == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - printf("secasindex{ mode=%u proto=%u\n", - saidx->mode, saidx->proto); + switch (mode) { + case IPSEC_MODE_ANY: + return ("any"); + case IPSEC_MODE_TRANSPORT: + return ("transport"); + case IPSEC_MODE_TUNNEL: + return ("tunnel"); + case IPSEC_MODE_TCPMD5: + return ("tcp-md5"); + } + return ("unknown"); +} - ipsec_hexdump((caddr_t)&saidx->src, - ((struct sockaddr *)&saidx->src)->sa_len); - printf("\n"); - ipsec_hexdump((caddr_t)&saidx->dst, - ((struct sockaddr *)&saidx->dst)->sa_len); - printf("\n"); +const char* +kdebug_secasv_state(u_int state) +{ - return; + switch (state) { + case SADB_SASTATE_LARVAL: + return ("larval"); + case SADB_SASTATE_MATURE: + return ("mature"); + case SADB_SASTATE_DYING: + return ("dying"); + case SADB_SASTATE_DEAD: + return ("dead"); + } + return ("unknown"); } -static void -kdebug_sec_lifetime(struct seclifetime *lft) +static char* +kdebug_port2str(const struct sockaddr *sa, char *buf, size_t len) { - /* sanity check */ - if (lft == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - - printf("sec_lifetime{ alloc=%u, bytes=%u\n", - lft->allocations, (u_int32_t)lft->bytes); - printf(" addtime=%u, usetime=%u }\n", - (u_int32_t)lft->addtime, (u_int32_t)lft->usetime); + uint16_t port; - return; + IPSEC_ASSERT(sa != NULL, ("null sa")); + switch (sa->sa_family) { +#ifdef INET + case AF_INET: + port = ntohs(((const struct sockaddr_in *)sa)->sin_port); + break; +#endif +#ifdef INET6 + case AF_INET6: + port = ntohs(((const struct sockaddr_in6 *)sa)->sin6_port); + break; +#endif + default: + port = 0; + } + if (port == 0) + return ("*"); + snprintf(buf, len, "%u", port); + return (buf); } void -kdebug_secasv(sav) - struct secasvar *sav; +kdebug_secpolicy(struct secpolicy *sp) { - /* sanity check */ - if (sav == NULL) - panic("%s: NULL pointer was passed.\n", __func__); + u_int idx; + + IPSEC_ASSERT(sp != NULL, ("null sp")); + printf("SP { refcnt=%u id=%u priority=%u state=%s policy=%s\n", + sp->refcnt, sp->id, sp->priority, + kdebug_secpolicy_state(sp->state), + kdebug_secpolicy_policy(sp->policy)); + kdebug_secpolicyindex(&sp->spidx, " "); + for (idx = 0; idx < sp->tcount; idx++) { + printf(" req[%u]{ level=%s ", idx, + kdebug_ipsecrequest_level(sp->req[idx]->level)); + kdebug_secasindex(&sp->req[idx]->saidx, NULL); + printf(" }\n"); + } + printf("}\n"); +} - printf("secas{"); - kdebug_secasindex(&sav->sah->saidx); +void +kdebug_secpolicyindex(struct secpolicyindex *spidx, const char *indent) +{ + char buf[IPSEC_ADDRSTRLEN]; + + IPSEC_ASSERT(spidx != NULL, ("null spidx")); + if (indent != NULL) + printf("%s", indent); + printf("spidx { dir=%s ul_proto=", + kdebug_secpolicyindex_dir(spidx->dir)); + if (spidx->ul_proto == IPSEC_ULPROTO_ANY) + printf("* "); + else + printf("%u ", spidx->ul_proto); + printf("%s/%u -> ", ipsec_address(&spidx->src, buf, sizeof(buf)), + spidx->prefs); + printf("%s/%u }\n", ipsec_address(&spidx->dst, buf, sizeof(buf)), + spidx->prefd); +} - printf(" refcnt=%u state=%u auth=%u enc=%u\n", - sav->refcnt, sav->state, sav->alg_auth, sav->alg_enc); - printf(" spi=%u flags=%u\n", - (u_int32_t)ntohl(sav->spi), sav->flags); +void +kdebug_secasindex(const struct secasindex *saidx, const char *indent) +{ + char buf[IPSEC_ADDRSTRLEN], port[6]; + + IPSEC_ASSERT(saidx != NULL, ("null saidx")); + if (indent != NULL) + printf("%s", indent); + printf("saidx { mode=%s proto=%u reqid=%u ", + kdebug_secasindex_mode(saidx->mode), saidx->proto, saidx->reqid); + printf("%s:%s -> ", ipsec_address(&saidx->src, buf, sizeof(buf)), + kdebug_port2str(&saidx->src.sa, port, sizeof(port))); + printf("%s:%s }\n", ipsec_address(&saidx->dst, buf, sizeof(buf)), + kdebug_port2str(&saidx->dst.sa, port, sizeof(port))); +} - if (sav->key_auth != NULL) - kdebug_sadb_key((struct sadb_ext *)sav->key_auth); - if (sav->key_enc != NULL) - kdebug_sadb_key((struct sadb_ext *)sav->key_enc); - if (sav->iv != NULL) { - printf(" iv="); - ipsec_hexdump(sav->iv, sav->ivlen ? sav->ivlen : 8); - printf("\n"); - } +static void +kdebug_sec_lifetime(struct seclifetime *lft, const char *indent) +{ - if (sav->replay != NULL) - kdebug_secreplay(sav->replay); - if (sav->lft_c != NULL) - kdebug_sec_lifetime(sav->lft_c); - if (sav->lft_h != NULL) - kdebug_sec_lifetime(sav->lft_h); - if (sav->lft_s != NULL) - kdebug_sec_lifetime(sav->lft_s); + IPSEC_ASSERT(lft != NULL, ("null lft")); + if (indent != NULL) + printf("%s", indent); + printf("lifetime { alloc=%u, bytes=%ju addtime=%ju usetime=%ju }\n", + lft->allocations, (uintmax_t)lft->bytes, (uintmax_t)lft->addtime, + (uintmax_t)lft->usetime); +} -#ifdef notyet - /* XXX: misc[123] ? */ -#endif +void +kdebug_secash(struct secashead *sah, const char *indent) +{ - return; + IPSEC_ASSERT(sah != NULL, ("null sah")); + if (indent != NULL) + printf("%s", indent); + printf("SAH { refcnt=%u state=%s\n", sah->refcnt, + kdebug_secasv_state(sah->state)); + if (indent != NULL) + printf("%s", indent); + kdebug_secasindex(&sah->saidx, indent); + if (indent != NULL) + printf("%s", indent); + printf("}\n"); } static void -kdebug_secreplay(rpl) - struct secreplay *rpl; +kdebug_secreplay(struct secreplay *rpl) { int len, l; - /* sanity check */ - if (rpl == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - - printf(" secreplay{ count=%u wsize=%u seq=%u lastseq=%u", - rpl->count, rpl->wsize, rpl->seq, rpl->lastseq); + IPSEC_ASSERT(rpl != NULL, ("null rpl")); + printf(" secreplay{ count=%u bitmap_size=%u wsize=%u seq=%u lastseq=%u", + rpl->count, rpl->bitmap_size, rpl->wsize, rpl->seq, rpl->lastseq); if (rpl->bitmap == NULL) { - printf(" }\n"); + printf(" }\n"); return; } - printf("\n bitmap { "); - - for (len = 0; len < rpl->wsize; len++) { + printf("\n bitmap { "); + for (len = 0; len < rpl->bitmap_size*4; len++) { for (l = 7; l >= 0; l--) printf("%u", (((rpl->bitmap)[len] >> l) & 1) ? 1 : 0); } - printf(" }\n"); + printf(" }\n"); +} - return; +static void +kdebug_secnatt(struct secnatt *natt) +{ + char buf[IPSEC_ADDRSTRLEN]; + + IPSEC_ASSERT(natt != NULL, ("null natt")); + printf(" natt{ sport=%u dport=%u ", ntohs(natt->sport), + ntohs(natt->dport)); + if (natt->flags & IPSEC_NATT_F_OAI) + printf("oai=%s ", ipsec_address(&natt->oai, buf, sizeof(buf))); + if (natt->flags & IPSEC_NATT_F_OAR) + printf("oar=%s ", ipsec_address(&natt->oar, buf, sizeof(buf))); + printf("}\n"); +} + +void +kdebug_secasv(struct secasvar *sav) +{ + struct seclifetime lft_c; + + IPSEC_ASSERT(sav != NULL, ("null sav")); + + printf("SA { refcnt=%u spi=%u seq=%u pid=%u flags=0x%x state=%s\n", + sav->refcnt, ntohl(sav->spi), sav->seq, (uint32_t)sav->pid, + sav->flags, kdebug_secasv_state(sav->state)); + kdebug_secash(sav->sah, " "); + + lft_c.addtime = sav->created; + lft_c.allocations = (uint32_t)counter_u64_fetch( + sav->lft_c_allocations); + lft_c.bytes = counter_u64_fetch(sav->lft_c_bytes); + lft_c.usetime = sav->firstused; + kdebug_sec_lifetime(&lft_c, " c_"); + if (sav->lft_h != NULL) + kdebug_sec_lifetime(sav->lft_h, " h_"); + if (sav->lft_s != NULL) + kdebug_sec_lifetime(sav->lft_s, " s_"); + + if (sav->tdb_authalgxform != NULL) + printf(" alg_auth=%s\n", sav->tdb_authalgxform->name); + if (sav->key_auth != NULL) + KEYDBG(DUMP, + kdebug_sadb_key((struct sadb_ext *)sav->key_auth)); + if (sav->tdb_encalgxform != NULL) + printf(" alg_enc=%s\n", sav->tdb_encalgxform->name); + if (sav->key_enc != NULL) + KEYDBG(DUMP, + kdebug_sadb_key((struct sadb_ext *)sav->key_enc)); + if (sav->natt != NULL) + kdebug_secnatt(sav->natt); + if (sav->replay != NULL) { + KEYDBG(DUMP, + SECASVAR_LOCK(sav); + kdebug_secreplay(sav->replay); + SECASVAR_UNLOCK(sav)); + } + printf("}\n"); } void -kdebug_mbufhdr(m) - struct mbuf *m; +kdebug_mbufhdr(const struct mbuf *m) { /* sanity check */ if (m == NULL) @@ -665,19 +776,18 @@ kdebug_mbufhdr(m) if (m->m_flags & M_EXT) { printf(" m_ext{ ext_buf:%p ext_free:%p " - "ext_size:%u ref_cnt:%p }\n", + "ext_size:%u ext_cnt:%p }\n", m->m_ext.ext_buf, m->m_ext.ext_free, - m->m_ext.ext_size, m->m_ext.ref_cnt); + m->m_ext.ext_size, m->m_ext.ext_cnt); } return; } void -kdebug_mbuf(m0) - struct mbuf *m0; +kdebug_mbuf(const struct mbuf *m0) { - struct mbuf *m = m0; + const struct mbuf *m = m0; int i, j; for (j = 0; m; m = m->m_next) { @@ -688,7 +798,7 @@ kdebug_mbuf(m0) printf("\n"); if (i % 4 == 0) printf(" "); - printf("%02x", mtod(m, u_char *)[i]); + printf("%02x", mtod(m, const u_char *)[i]); j++; } printf("\n"); @@ -696,11 +806,51 @@ kdebug_mbuf(m0) return; } + +/* Return a printable string for the address. */ +char * +ipsec_address(const union sockaddr_union* sa, char *buf, socklen_t size) +{ + + switch (sa->sa.sa_family) { +#ifdef INET + case AF_INET: + return (inet_ntop(AF_INET, &sa->sin.sin_addr, buf, size)); +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (IN6_IS_SCOPE_LINKLOCAL(&sa->sin6.sin6_addr)) { + snprintf(buf, size, "%s%%%u", inet_ntop(AF_INET6, + &sa->sin6.sin6_addr, buf, size), + sa->sin6.sin6_scope_id); + return (buf); + } else + return (inet_ntop(AF_INET6, &sa->sin6.sin6_addr, + buf, size)); +#endif /* INET6 */ + case 0: + return ("*"); + default: + return ("(unknown address family)"); + } +} + +char * +ipsec_sa2str(struct secasvar *sav, char *buf, size_t size) +{ + char sbuf[IPSEC_ADDRSTRLEN], dbuf[IPSEC_ADDRSTRLEN]; + + snprintf(buf, size, "SA(SPI=%08lx src=%s dst=%s)", + (u_long)ntohl(sav->spi), + ipsec_address(&sav->sah->saidx.src, sbuf, sizeof(sbuf)), + ipsec_address(&sav->sah->saidx.dst, dbuf, sizeof(dbuf))); + return (buf); +} + #endif /* _KERNEL */ void -kdebug_sockaddr(addr) - struct sockaddr *addr; +kdebug_sockaddr(struct sockaddr *addr) { struct sockaddr_in *sin4; #ifdef INET6 @@ -738,9 +888,7 @@ kdebug_sockaddr(addr) } void -ipsec_bindump(buf, len) - caddr_t buf; - int len; +ipsec_bindump(caddr_t buf, int len) { int i; @@ -752,9 +900,7 @@ ipsec_bindump(buf, len) void -ipsec_hexdump(buf, len) - caddr_t buf; - int len; +ipsec_hexdump(caddr_t buf, int len) { int i; diff --git a/freebsd/sys/netipsec/key_debug.h b/freebsd/sys/netipsec/key_debug.h index 1a3782b1..18150b53 100644 --- a/freebsd/sys/netipsec/key_debug.h +++ b/freebsd/sys/netipsec/key_debug.h @@ -53,37 +53,50 @@ #define KEYDEBUG_IPSEC_DATA (KEYDEBUG_IPSEC | KEYDEBUG_DATA) #define KEYDEBUG_IPSEC_DUMP (KEYDEBUG_IPSEC | KEYDEBUG_DUMP) -#define KEYDEBUG(lev,arg) \ - do { if ((V_key_debug_level & (lev)) == (lev)) { arg; } } while (0) +#define KEYDBG(lev, arg) \ + if ((V_key_debug_level & (KEYDEBUG_ ## lev)) == (KEYDEBUG_ ## lev)) { \ + arg; \ + } -VNET_DECLARE(u_int32_t, key_debug_level); +VNET_DECLARE(uint32_t, key_debug_level); #define V_key_debug_level VNET(key_debug_level) #endif /*_KERNEL*/ struct sadb_msg; struct sadb_ext; -extern void kdebug_sadb __P((struct sadb_msg *)); -extern void kdebug_sadb_x_policy __P((struct sadb_ext *)); +extern void kdebug_sadb(struct sadb_msg *); +extern void kdebug_sadb_x_policy(struct sadb_ext *); #ifdef _KERNEL struct secpolicy; struct secpolicyindex; struct secasindex; +struct secashead; struct secasvar; struct secreplay; struct mbuf; -extern void kdebug_secpolicy __P((struct secpolicy *)); -extern void kdebug_secpolicyindex __P((struct secpolicyindex *)); -extern void kdebug_secasindex __P((struct secasindex *)); -extern void kdebug_secasv __P((struct secasvar *)); -extern void kdebug_mbufhdr __P((struct mbuf *)); -extern void kdebug_mbuf __P((struct mbuf *)); +union sockaddr_union; +const char* kdebug_secpolicy_state(u_int); +const char* kdebug_secpolicy_policy(u_int); +const char* kdebug_secpolicyindex_dir(u_int); +const char* kdebug_ipsecrequest_level(u_int); +const char* kdebug_secasindex_mode(u_int); +const char* kdebug_secasv_state(u_int); +void kdebug_secpolicy(struct secpolicy *); +void kdebug_secpolicyindex(struct secpolicyindex *, const char *); +void kdebug_secasindex(const struct secasindex *, const char *); +void kdebug_secash(struct secashead *, const char *); +void kdebug_secasv(struct secasvar *); +void kdebug_mbufhdr(const struct mbuf *); +void kdebug_mbuf(const struct mbuf *); +char *ipsec_address(const union sockaddr_union *, char *, socklen_t); +char *ipsec_sa2str(struct secasvar *, char *, size_t); #endif /*_KERNEL*/ struct sockaddr; -extern void kdebug_sockaddr __P((struct sockaddr *)); +extern void kdebug_sockaddr(struct sockaddr *); -extern void ipsec_hexdump __P((caddr_t, int)); -extern void ipsec_bindump __P((caddr_t, int)); +extern void ipsec_hexdump(caddr_t, int); +extern void ipsec_bindump(caddr_t, int); #endif /* _NETIPSEC_KEY_DEBUG_H_ */ diff --git a/freebsd/sys/netipsec/key_var.h b/freebsd/sys/netipsec/key_var.h index edf232d8..ecef2360 100644 --- a/freebsd/sys/netipsec/key_var.h +++ b/freebsd/sys/netipsec/key_var.h @@ -46,23 +46,6 @@ #define KEYCTL_ESP_AUTH 10 #define KEYCTL_AH_KEYMIN 11 #define KEYCTL_PREFERED_OLDSA 12 -#define KEYCTL_MAXID 13 - -#define KEYCTL_NAMES { \ - { 0, 0 }, \ - { "debug", CTLTYPE_INT }, \ - { "spi_try", CTLTYPE_INT }, \ - { "spi_min_value", CTLTYPE_INT }, \ - { "spi_max_value", CTLTYPE_INT }, \ - { "random_int", CTLTYPE_INT }, \ - { "larval_lifetime", CTLTYPE_INT }, \ - { "blockacq_count", CTLTYPE_INT }, \ - { "blockacq_lifetime", CTLTYPE_INT }, \ - { "esp_keymin", CTLTYPE_INT }, \ - { "esp_auth", CTLTYPE_INT }, \ - { "ah_keymin", CTLTYPE_INT }, \ - { "prefered_oldsa", CTLTYPE_INT }, \ -} #ifdef _KERNEL #define _ARRAYLEN(p) (sizeof(p)/sizeof(p[0])) diff --git a/freebsd/sys/netipsec/keydb.h b/freebsd/sys/netipsec/keydb.h index 7494f5f4..e3650146 100644 --- a/freebsd/sys/netipsec/keydb.h +++ b/freebsd/sys/netipsec/keydb.h @@ -34,6 +34,9 @@ #define _NETIPSEC_KEYDB_H_ #ifdef _KERNEL +#include +#include +#include #include @@ -54,9 +57,9 @@ union sockaddr_union { struct secasindex { union sockaddr_union src; /* source address for SA */ union sockaddr_union dst; /* destination address for SA */ - u_int16_t proto; /* IPPROTO_ESP or IPPROTO_AH */ - u_int8_t mode; /* mode of protocol, see ipsec.h */ - u_int32_t reqid; /* reqid id who owned this SA */ + uint8_t proto; /* IPPROTO_ESP or IPPROTO_AH */ + uint8_t mode; /* mode of protocol, see ipsec.h */ + uint32_t reqid; /* reqid id who owned this SA */ /* see IPSEC_MANUAL_REQID_MAX. */ }; @@ -85,15 +88,23 @@ struct seclifetime { u_int64_t usetime; }; -union sa_route_union { - struct route sa_route; - struct route sin_route; /* Duplicate for consistency. */ - struct route_in6 sin6_route; +struct secnatt { + union sockaddr_union oai; /* original addresses of initiator */ + union sockaddr_union oar; /* original address of responder */ + uint16_t sport; /* source port */ + uint16_t dport; /* destination port */ + uint16_t cksum; /* checksum delta */ + uint16_t flags; +#define IPSEC_NATT_F_OAI 0x0001 +#define IPSEC_NATT_F_OAR 0x0002 }; /* Security Association Data Base */ +TAILQ_HEAD(secasvar_queue, secasvar); struct secashead { - LIST_ENTRY(secashead) chain; + TAILQ_ENTRY(secashead) chain; + LIST_ENTRY(secashead) addrhash; /* hash by sproto+src+dst addresses */ + LIST_ENTRY(secashead) drainq; /* used ONLY by flush callout */ struct secasindex saidx; @@ -101,12 +112,10 @@ struct secashead { struct secident *identd; /* destination identity */ /* XXX I don't know how to use them. */ - u_int8_t state; /* MATURE or DEAD. */ - LIST_HEAD(_satree, secasvar) savtree[SADB_SASTATE_MAX+1]; - /* SA chain */ - /* The first of this list is newer SA */ - - union sa_route_union route_cache; + volatile u_int refcnt; /* reference count */ + uint8_t state; /* MATURE or DEAD. */ + struct secasvar_queue savtree_alive; /* MATURE and DYING SA */ + struct secasvar_queue savtree_larval; /* LARVAL SA */ }; struct xformsw; @@ -114,72 +123,89 @@ struct enc_xform; struct auth_hash; struct comp_algo; -/* Security Association */ +/* + * Security Association + * + * For INBOUND packets we do SA lookup using SPI, thus only SPIHASH is used. + * For OUTBOUND packets there may be several SA suitable for packet. + * We use key_preferred_oldsa variable to choose better SA. First of we do + * lookup for suitable SAH using packet's saidx. Then we use SAH's savtree + * to search better candidate. The newer SA (by created time) are placed + * in the beginning of the savtree list. There is no preference between + * DYING and MATURE. + * + * NB: Fields with a tdb_ prefix are part of the "glue" used + * to interface to the OpenBSD crypto support. This was done + * to distinguish this code from the mainline KAME code. + * NB: Fields are sorted on the basis of the frequency of changes, i.e. + * constants and unchangeable fields are going first. + * NB: if you want to change this structure, check that this will not break + * key_updateaddresses(). + */ struct secasvar { - LIST_ENTRY(secasvar) chain; - struct mtx lock; /* update/access lock */ - - u_int refcnt; /* reference count */ - u_int8_t state; /* Status of this Association */ - - u_int8_t alg_auth; /* Authentication Algorithm Identifier*/ - u_int8_t alg_enc; /* Cipher Algorithm Identifier */ - u_int8_t alg_comp; /* Compression Algorithm Identifier */ - u_int32_t spi; /* SPI Value, network byte order */ - u_int32_t flags; /* holder for SADB_KEY_FLAGS */ + uint32_t spi; /* SPI Value, network byte order */ + uint32_t flags; /* holder for SADB_KEY_FLAGS */ + uint32_t seq; /* sequence number */ + pid_t pid; /* message's pid */ + u_int ivlen; /* length of IV */ + struct secashead *sah; /* back pointer to the secashead */ struct seckey *key_auth; /* Key for Authentication */ struct seckey *key_enc; /* Key for Encryption */ - caddr_t iv; /* Initilization Vector */ - u_int ivlen; /* length of IV */ - void *sched; /* intermediate encryption key */ - size_t schedlen; - struct secreplay *replay; /* replay prevention */ - time_t created; /* for lifetime */ - - struct seclifetime *lft_c; /* CURRENT lifetime, it's constant. */ + struct secnatt *natt; /* NAT-T config */ + struct mtx *lock; /* update/access lock */ + + const struct xformsw *tdb_xform; /* transform */ + const struct enc_xform *tdb_encalgxform;/* encoding algorithm */ + const struct auth_hash *tdb_authalgxform;/* authentication algorithm */ + const struct comp_algo *tdb_compalgxform;/* compression algorithm */ + uint64_t tdb_cryptoid; /* crypto session id */ + + uint8_t alg_auth; /* Authentication Algorithm Identifier*/ + uint8_t alg_enc; /* Cipher Algorithm Identifier */ + uint8_t alg_comp; /* Compression Algorithm Identifier */ + uint8_t state; /* Status of this SA (pfkeyv2.h) */ + + counter_u64_t lft_c; /* CURRENT lifetime */ +#define lft_c_allocations lft_c +#define lft_c_bytes lft_c + 1 struct seclifetime *lft_h; /* HARD lifetime */ struct seclifetime *lft_s; /* SOFT lifetime */ - u_int32_t seq; /* sequence number */ - pid_t pid; /* message's pid */ + uint64_t created; /* time when SA was created */ + uint64_t firstused; /* time when SA was first used */ - struct secashead *sah; /* back pointer to the secashead */ + TAILQ_ENTRY(secasvar) chain; + LIST_ENTRY(secasvar) spihash; + LIST_ENTRY(secasvar) drainq; /* used ONLY by flush callout */ - /* - * NB: Fields with a tdb_ prefix are part of the "glue" used - * to interface to the OpenBSD crypto support. This was done - * to distinguish this code from the mainline KAME code. - */ - struct xformsw *tdb_xform; /* transform */ - struct enc_xform *tdb_encalgxform; /* encoding algorithm */ - struct auth_hash *tdb_authalgxform; /* authentication algorithm */ - struct comp_algo *tdb_compalgxform; /* compression algorithm */ - u_int64_t tdb_cryptoid; /* crypto session id */ - - /* - * NAT-Traversal. - */ - u_int16_t natt_type; /* IKE/ESP-marker in output. */ - u_int16_t natt_esp_frag_len; /* MTU for payload fragmentation. */ + uint64_t cntr; /* counter for GCM and CTR */ + volatile u_int refcnt; /* reference count */ }; -#define SECASVAR_LOCK_INIT(_sav) \ - mtx_init(&(_sav)->lock, "ipsec association", NULL, MTX_DEF) -#define SECASVAR_LOCK(_sav) mtx_lock(&(_sav)->lock) -#define SECASVAR_UNLOCK(_sav) mtx_unlock(&(_sav)->lock) -#define SECASVAR_LOCK_DESTROY(_sav) mtx_destroy(&(_sav)->lock) -#define SECASVAR_LOCK_ASSERT(_sav) mtx_assert(&(_sav)->lock, MA_OWNED) - -/* replay prevention */ +#define SECASVAR_LOCK(_sav) mtx_lock((_sav)->lock) +#define SECASVAR_UNLOCK(_sav) mtx_unlock((_sav)->lock) +#define SECASVAR_LOCK_ASSERT(_sav) mtx_assert((_sav)->lock, MA_OWNED) +#define SAV_ISGCM(_sav) \ + ((_sav)->alg_enc == SADB_X_EALG_AESGCM8 || \ + (_sav)->alg_enc == SADB_X_EALG_AESGCM12 || \ + (_sav)->alg_enc == SADB_X_EALG_AESGCM16) +#define SAV_ISCTR(_sav) ((_sav)->alg_enc == SADB_X_EALG_AESCTR) +#define SAV_ISCTRORGCM(_sav) (SAV_ISCTR((_sav)) || SAV_ISGCM((_sav))) + +/* Replay prevention, protected by SECASVAR_LOCK: + * (m) locked by mtx + * (c) read only except during creation / free + */ struct secreplay { - u_int32_t count; - u_int wsize; /* window size, i.g. 4 bytes */ - u_int32_t seq; /* used by sender */ - u_int32_t lastseq; /* used by receiver */ - caddr_t bitmap; /* used by receiver */ - int overflow; /* overflow flag */ + u_int32_t count; /* (m) */ + u_int wsize; /* (c) window size, i.g. 4 bytes */ + u_int32_t seq; /* (m) used by sender */ + u_int32_t lastseq; /* (m) used by receiver */ + u_int32_t *bitmap; /* (m) used by receiver */ + u_int bitmap_size; /* (c) size of the bitmap array */ + int overflow; /* (m) overflow flag */ }; /* socket table due to send PF_KEY messages. */ @@ -192,36 +218,15 @@ struct secreg { /* acquiring list table. */ struct secacq { LIST_ENTRY(secacq) chain; + LIST_ENTRY(secacq) addrhash; + LIST_ENTRY(secacq) seqhash; struct secasindex saidx; - - u_int32_t seq; /* sequence number */ + uint32_t seq; /* sequence number */ time_t created; /* for lifetime */ int count; /* for lifetime */ }; -/* Sensitivity Level Specification */ -/* nothing */ - -#define SADB_KILL_INTERVAL 600 /* six seconds */ - -/* secpolicy */ -extern struct secpolicy *keydb_newsecpolicy __P((void)); -extern void keydb_delsecpolicy __P((struct secpolicy *)); -/* secashead */ -extern struct secashead *keydb_newsecashead __P((void)); -extern void keydb_delsecashead __P((struct secashead *)); -/* secasvar */ -extern struct secasvar *keydb_newsecasvar __P((void)); -extern void keydb_refsecasvar __P((struct secasvar *)); -extern void keydb_freesecasvar __P((struct secasvar *)); -/* secreplay */ -extern struct secreplay *keydb_newsecreplay __P((size_t)); -extern void keydb_delsecreplay __P((struct secreplay *)); -/* secreg */ -extern struct secreg *keydb_newsecreg __P((void)); -extern void keydb_delsecreg __P((struct secreg *)); - #endif /* _KERNEL */ #endif /* _NETIPSEC_KEYDB_H_ */ diff --git a/freebsd/sys/netipsec/keysock.c b/freebsd/sys/netipsec/keysock.c index 29e23bf3..21430347 100644 --- a/freebsd/sys/netipsec/keysock.c +++ b/freebsd/sys/netipsec/keysock.c @@ -54,9 +54,8 @@ #include #include -#include -#include #include +#include #include @@ -77,20 +76,25 @@ static VNET_DEFINE(struct key_cb, key_cb); static struct sockaddr key_src = { 2, PF_KEY, }; -static int key_sendup0 __P((struct rawcb *, struct mbuf *, int)); +static int key_sendup0(struct rawcb *, struct mbuf *, int); + +VNET_PCPUSTAT_DEFINE(struct pfkeystat, pfkeystat); +VNET_PCPUSTAT_SYSINIT(pfkeystat); -VNET_DEFINE(struct pfkeystat, pfkeystat); +#ifdef VIMAGE +VNET_PCPUSTAT_SYSUNINIT(pfkeystat); +#endif /* VIMAGE */ /* * key_output() */ int -key_output(struct mbuf *m, struct socket *so) +key_output(struct mbuf *m, struct socket *so, ...) { struct sadb_msg *msg; int len, error = 0; - if (m == 0) + if (m == NULL) panic("%s: NULL pointer was passed.\n", __func__); PFKEYSTAT_INC(out_total); @@ -104,7 +108,7 @@ key_output(struct mbuf *m, struct socket *so) } if (m->m_len < sizeof(struct sadb_msg)) { - if ((m = m_pullup(m, sizeof(struct sadb_msg))) == 0) { + if ((m = m_pullup(m, sizeof(struct sadb_msg))) == NULL) { PFKEYSTAT_INC(out_nomem); error = ENOBUFS; goto end; @@ -113,7 +117,7 @@ key_output(struct mbuf *m, struct socket *so) M_ASSERTPKTHDR(m); - KEYDEBUG(KEYDEBUG_KEY_DUMP, kdebug_mbuf(m)); + KEYDBG(KEY_DUMP, kdebug_mbuf(m)); msg = mtod(m, struct sadb_msg *); PFKEYSTAT_INC(out_msgtype[msg->sadb_msg_type]); @@ -135,26 +139,18 @@ end: * send message to the socket. */ static int -key_sendup0(rp, m, promisc) - struct rawcb *rp; - struct mbuf *m; - int promisc; +key_sendup0(struct rawcb *rp, struct mbuf *m, int promisc) { int error; if (promisc) { struct sadb_msg *pmsg; - M_PREPEND(m, sizeof(struct sadb_msg), M_DONTWAIT); - if (m && m->m_len < sizeof(struct sadb_msg)) - m = m_pullup(m, sizeof(struct sadb_msg)); - if (!m) { + M_PREPEND(m, sizeof(struct sadb_msg), M_NOWAIT); + if (m == NULL) { PFKEYSTAT_INC(in_nomem); - m_freem(m); - return ENOBUFS; + return (ENOBUFS); } - m->m_pkthdr.len += sizeof(*pmsg); - pmsg = mtod(m, struct sadb_msg *); bzero(pmsg, sizeof(*pmsg)); pmsg->sadb_msg_version = PF_KEY_V2; @@ -178,22 +174,18 @@ key_sendup0(rp, m, promisc) /* XXX this interface should be obsoleted. */ int -key_sendup(so, msg, len, target) - struct socket *so; - struct sadb_msg *msg; - u_int len; - int target; /*target of the resulting message*/ +key_sendup(struct socket *so, struct sadb_msg *msg, u_int len, int target) { struct mbuf *m, *n, *mprev; int tlen; /* sanity check */ - if (so == 0 || msg == 0) + if (so == NULL || msg == NULL) panic("%s: NULL pointer was passed.\n", __func__); - KEYDEBUG(KEYDEBUG_KEY_DUMP, - printf("%s: \n", __func__); - kdebug_sadb(msg)); + KEYDBG(KEY_DUMP, + printf("%s: \n", __func__); + kdebug_sadb(msg)); /* * we increment statistics here, just in case we have ENOBUFS @@ -216,14 +208,14 @@ key_sendup(so, msg, len, target) m = mprev = NULL; while (tlen > 0) { if (tlen == len) { - MGETHDR(n, M_DONTWAIT, MT_DATA); + MGETHDR(n, M_NOWAIT, MT_DATA); if (n == NULL) { PFKEYSTAT_INC(in_nomem); return ENOBUFS; } n->m_len = MHLEN; } else { - MGET(n, M_DONTWAIT, MT_DATA); + MGET(n, M_NOWAIT, MT_DATA); if (n == NULL) { PFKEYSTAT_INC(in_nomem); return ENOBUFS; @@ -231,8 +223,7 @@ key_sendup(so, msg, len, target) n->m_len = MLEN; } if (tlen >= MCLBYTES) { /*XXX better threshold? */ - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { + if (!(MCLGET(n, M_NOWAIT))) { m_free(n); m_freem(m); PFKEYSTAT_INC(in_nomem); @@ -267,10 +258,7 @@ key_sendup(so, msg, len, target) /* so can be NULL if target != KEY_SENDUP_ONE */ int -key_sendup_mbuf(so, m, target) - struct socket *so; - struct mbuf *m; - int target; +key_sendup_mbuf(struct socket *so, struct mbuf *m, int target) { struct mbuf *n; struct keycb *kp; @@ -315,7 +303,7 @@ key_sendup_mbuf(so, m, target) * (based on pf_key@inner.net message on 14 Oct 1998) */ if (((struct keycb *)rp)->kp_promisc) { - if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { + if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { (void)key_sendup0(rp, n, 1); n = NULL; } @@ -345,7 +333,7 @@ key_sendup_mbuf(so, m, target) if (!sendup) continue; - if ((n = m_copy(m, 0, (int)M_COPYALL)) == NULL) { + if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { m_freem(m); PFKEYSTAT_INC(in_nomem); mtx_unlock(&rawcb_mtx); @@ -402,7 +390,7 @@ key_attach(struct socket *so, int proto, struct thread *td) /* XXX */ kp = malloc(sizeof *kp, M_PCB, M_WAITOK | M_ZERO); - if (kp == 0) + if (kp == NULL) return ENOBUFS; so->so_pcb = (caddr_t)kp; @@ -578,7 +566,7 @@ struct domain keydomain = { .dom_destroy = key_destroy, #endif .dom_protosw = keysw, - .dom_protoswNPROTOSW = &keysw[sizeof(keysw)/sizeof(keysw[0])] + .dom_protoswNPROTOSW = &keysw[nitems(keysw)] }; VNET_DOMAIN_SET(key); diff --git a/freebsd/sys/netipsec/keysock.h b/freebsd/sys/netipsec/keysock.h index 6039dbba..8fbf4a02 100644 --- a/freebsd/sys/netipsec/keysock.h +++ b/freebsd/sys/netipsec/keysock.h @@ -36,26 +36,26 @@ /* statistics for pfkey socket */ struct pfkeystat { /* kernel -> userland */ - u_quad_t out_total; /* # of total calls */ - u_quad_t out_bytes; /* total bytecount */ - u_quad_t out_msgtype[256]; /* message type histogram */ - u_quad_t out_invlen; /* invalid length field */ - u_quad_t out_invver; /* invalid version field */ - u_quad_t out_invmsgtype; /* invalid message type field */ - u_quad_t out_tooshort; /* msg too short */ - u_quad_t out_nomem; /* memory allocation failure */ - u_quad_t out_dupext; /* duplicate extension */ - u_quad_t out_invexttype; /* invalid extension type */ - u_quad_t out_invsatype; /* invalid sa type */ - u_quad_t out_invaddr; /* invalid address extension */ + uint64_t out_total; /* # of total calls */ + uint64_t out_bytes; /* total bytecount */ + uint64_t out_msgtype[256]; /* message type histogram */ + uint64_t out_invlen; /* invalid length field */ + uint64_t out_invver; /* invalid version field */ + uint64_t out_invmsgtype; /* invalid message type field */ + uint64_t out_tooshort; /* msg too short */ + uint64_t out_nomem; /* memory allocation failure */ + uint64_t out_dupext; /* duplicate extension */ + uint64_t out_invexttype; /* invalid extension type */ + uint64_t out_invsatype; /* invalid sa type */ + uint64_t out_invaddr; /* invalid address extension */ /* userland -> kernel */ - u_quad_t in_total; /* # of total calls */ - u_quad_t in_bytes; /* total bytecount */ - u_quad_t in_msgtype[256]; /* message type histogram */ - u_quad_t in_msgtarget[3]; /* one/all/registered */ - u_quad_t in_nomem; /* memory allocation failure */ + uint64_t in_total; /* # of total calls */ + uint64_t in_bytes; /* total bytecount */ + uint64_t in_msgtype[256]; /* message type histogram */ + uint64_t in_msgtarget[3]; /* one/all/registered */ + uint64_t in_nomem; /* memory allocation failure */ /* others */ - u_quad_t sockerr; /* # of socket related errors */ + uint64_t sockerr; /* # of socket related errors */ }; #define KEY_SENDUP_ONE 0 @@ -63,23 +63,25 @@ struct pfkeystat { #define KEY_SENDUP_REGISTERED 2 #ifdef _KERNEL +#include + struct keycb { struct rawcb kp_raw; /* rawcb */ int kp_promisc; /* promiscuous mode */ int kp_registered; /* registered socket */ }; -VNET_DECLARE(struct pfkeystat, pfkeystat); -#define PFKEYSTAT_ADD(name, val) V_pfkeystat.name += (val) +VNET_PCPUSTAT_DECLARE(struct pfkeystat, pfkeystat); +#define PFKEYSTAT_ADD(name, val) \ + VNET_PCPUSTAT_ADD(struct pfkeystat, pfkeystat, name, (val)) #define PFKEYSTAT_INC(name) PFKEYSTAT_ADD(name, 1) -#define V_pfkeystat VNET(pfkeystat) -extern int key_output(struct mbuf *m, struct socket *so); -extern int key_usrreq __P((struct socket *, - int, struct mbuf *, struct mbuf *, struct mbuf *)); +extern int key_output(struct mbuf *m, struct socket *so, ...); +extern int key_usrreq(struct socket *, int, struct mbuf *, + struct mbuf *, struct mbuf *); -extern int key_sendup __P((struct socket *, struct sadb_msg *, u_int, int)); -extern int key_sendup_mbuf __P((struct socket *, struct mbuf *, int)); +extern int key_sendup(struct socket *, struct sadb_msg *, u_int, int); +extern int key_sendup_mbuf(struct socket *, struct mbuf *, int); #endif /* _KERNEL */ #endif /*_NETIPSEC_KEYSOCK_H_*/ diff --git a/freebsd/sys/netipsec/subr_ipsec.c b/freebsd/sys/netipsec/subr_ipsec.c new file mode 100644 index 00000000..ff830564 --- /dev/null +++ b/freebsd/sys/netipsec/subr_ipsec.c @@ -0,0 +1,356 @@ +#include + +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +/* + * This file is build in the kernel only when 'options IPSEC' or + * 'options IPSEC_SUPPORT' is enabled. + */ + +#ifdef INET +void +ipsec4_setsockaddrs(const struct mbuf *m, union sockaddr_union *src, + union sockaddr_union *dst) +{ + static const struct sockaddr_in template = { + sizeof (struct sockaddr_in), + AF_INET, + 0, { 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 } + }; + + src->sin = template; + dst->sin = template; + + if (m->m_len < sizeof (struct ip)) { + m_copydata(m, offsetof(struct ip, ip_src), + sizeof (struct in_addr), + (caddr_t) &src->sin.sin_addr); + m_copydata(m, offsetof(struct ip, ip_dst), + sizeof (struct in_addr), + (caddr_t) &dst->sin.sin_addr); + } else { + const struct ip *ip = mtod(m, const struct ip *); + src->sin.sin_addr = ip->ip_src; + dst->sin.sin_addr = ip->ip_dst; + } +} +#endif +#ifdef INET6 +void +ipsec6_setsockaddrs(const struct mbuf *m, union sockaddr_union *src, + union sockaddr_union *dst) +{ + struct ip6_hdr ip6buf; + const struct ip6_hdr *ip6; + + if (m->m_len >= sizeof(*ip6)) + ip6 = mtod(m, const struct ip6_hdr *); + else { + m_copydata(m, 0, sizeof(ip6buf), (caddr_t)&ip6buf); + ip6 = &ip6buf; + } + + bzero(&src->sin6, sizeof(struct sockaddr_in6)); + src->sin6.sin6_family = AF_INET6; + src->sin6.sin6_len = sizeof(struct sockaddr_in6); + bcopy(&ip6->ip6_src, &src->sin6.sin6_addr, sizeof(ip6->ip6_src)); + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { + src->sin6.sin6_addr.s6_addr16[1] = 0; + src->sin6.sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]); + } + + bzero(&dst->sin6, sizeof(struct sockaddr_in6)); + dst->sin6.sin6_family = AF_INET6; + dst->sin6.sin6_len = sizeof(struct sockaddr_in6); + bcopy(&ip6->ip6_dst, &dst->sin6.sin6_addr, sizeof(ip6->ip6_dst)); + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { + dst->sin6.sin6_addr.s6_addr16[1] = 0; + dst->sin6.sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); + } +} +#endif + +#ifdef IPSEC_SUPPORT +/* + * IPSEC_SUPPORT - loading of ipsec.ko and tcpmd5.ko is supported. + * IPSEC + IPSEC_SUPPORT - loading tcpmd5.ko is supported. + * IPSEC + TCP_SIGNATURE - all is build in the kernel, do not build + * IPSEC_SUPPORT. + */ +#if !defined(IPSEC) || !defined(TCP_SIGNATURE) +#define IPSEC_MODULE_INCR 2 +static int +ipsec_kmod_enter(volatile u_int *cntr) +{ + u_int old, new; + + do { + old = *cntr; + if ((old & IPSEC_MODULE_ENABLED) == 0) + return (ENXIO); + new = old + IPSEC_MODULE_INCR; + } while(atomic_cmpset_acq_int(cntr, old, new) == 0); + return (0); +} + +static void +ipsec_kmod_exit(volatile u_int *cntr) +{ + u_int old, new; + + do { + old = *cntr; + new = old - IPSEC_MODULE_INCR; + } while (atomic_cmpset_rel_int(cntr, old, new) == 0); +} + +static void +ipsec_kmod_drain(volatile u_int *cntr) +{ + u_int old, new; + + do { + old = *cntr; + new = old & ~IPSEC_MODULE_ENABLED; + } while (atomic_cmpset_acq_int(cntr, old, new) == 0); + while (atomic_cmpset_int(cntr, 0, 0) == 0) + pause("ipsecd", hz/2); +} + +#define METHOD_DECL(...) __VA_ARGS__ +#define METHOD_ARGS(...) __VA_ARGS__ +#define IPSEC_KMOD_METHOD(type, name, sc, method, decl, args) \ +type name (decl) \ +{ \ + type ret = (type)ipsec_kmod_enter(&sc->enabled); \ + if (ret == 0) { \ + ret = (*sc->methods->method)(args); \ + ipsec_kmod_exit(&sc->enabled); \ + } \ + return (ret); \ +} + +static int +ipsec_support_modevent(module_t mod, int type, void *data) +{ + + switch (type) { + case MOD_LOAD: + return (0); + case MOD_UNLOAD: + return (EBUSY); + default: + return (EOPNOTSUPP); + } +} + +static moduledata_t ipsec_support_mod = { + "ipsec_support", + ipsec_support_modevent, + 0 +}; +DECLARE_MODULE(ipsec_support, ipsec_support_mod, SI_SUB_PROTO_DOMAIN, + SI_ORDER_ANY); +MODULE_VERSION(ipsec_support, 1); +#endif /* !IPSEC || !TCP_SIGNATURE */ + +#ifndef TCP_SIGNATURE +/* Declare TCP-MD5 support as kernel module. */ +static struct tcpmd5_support tcpmd5_ipsec = { + .enabled = 0, + .methods = NULL +}; +struct tcpmd5_support * const tcp_ipsec_support = &tcpmd5_ipsec; + +IPSEC_KMOD_METHOD(int, tcpmd5_kmod_input, sc, + input, METHOD_DECL(struct tcpmd5_support * const sc, struct mbuf *m, + struct tcphdr *th, u_char *buf), METHOD_ARGS(m, th, buf) +) + +IPSEC_KMOD_METHOD(int, tcpmd5_kmod_output, sc, + output, METHOD_DECL(struct tcpmd5_support * const sc, struct mbuf *m, + struct tcphdr *th, u_char *buf), METHOD_ARGS(m, th, buf) +) + +IPSEC_KMOD_METHOD(int, tcpmd5_kmod_pcbctl, sc, + pcbctl, METHOD_DECL(struct tcpmd5_support * const sc, struct inpcb *inp, + struct sockopt *sopt), METHOD_ARGS(inp, sopt) +) + +void +tcpmd5_support_enable(const struct tcpmd5_methods * const methods) +{ + + KASSERT(tcp_ipsec_support->enabled == 0, ("TCP-MD5 already enabled")); + tcp_ipsec_support->methods = methods; + tcp_ipsec_support->enabled |= IPSEC_MODULE_ENABLED; +} + +void +tcpmd5_support_disable(void) +{ + + if (tcp_ipsec_support->enabled & IPSEC_MODULE_ENABLED) { + ipsec_kmod_drain(&tcp_ipsec_support->enabled); + tcp_ipsec_support->methods = NULL; + } +} +#endif /* !TCP_SIGNATURE */ + +#ifndef IPSEC +/* + * IPsec support is build as kernel module. + */ +#ifdef INET +static struct ipsec_support ipv4_ipsec = { + .enabled = 0, + .methods = NULL +}; +struct ipsec_support * const ipv4_ipsec_support = &ipv4_ipsec; + +IPSEC_KMOD_METHOD(int, ipsec_kmod_udp_input, sc, + udp_input, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + int off, int af), METHOD_ARGS(m, off, af) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_udp_pcbctl, sc, + udp_pcbctl, METHOD_DECL(struct ipsec_support * const sc, struct inpcb *inp, + struct sockopt *sopt), METHOD_ARGS(inp, sopt) +) +#endif + +#ifdef INET6 +static struct ipsec_support ipv6_ipsec = { + .enabled = 0, + .methods = NULL +}; +struct ipsec_support * const ipv6_ipsec_support = &ipv6_ipsec; +#endif + +IPSEC_KMOD_METHOD(int, ipsec_kmod_input, sc, + input, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + int offset, int proto), METHOD_ARGS(m, offset, proto) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_check_policy, sc, + check_policy, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + struct inpcb *inp), METHOD_ARGS(m, inp) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_forward, sc, + forward, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m), + (m) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_output, sc, + output, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + struct inpcb *inp), METHOD_ARGS(m, inp) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_pcbctl, sc, + pcbctl, METHOD_DECL(struct ipsec_support * const sc, struct inpcb *inp, + struct sockopt *sopt), METHOD_ARGS(inp, sopt) +) + +IPSEC_KMOD_METHOD(size_t, ipsec_kmod_hdrsize, sc, + hdrsize, METHOD_DECL(struct ipsec_support * const sc, struct inpcb *inp), + (inp) +) + +static IPSEC_KMOD_METHOD(int, ipsec_kmod_caps, sc, + capability, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + u_int cap), METHOD_ARGS(m, cap) +) + +int +ipsec_kmod_capability(struct ipsec_support * const sc, struct mbuf *m, + u_int cap) +{ + + /* + * Since PF_KEY is build in the kernel, we can directly + * call key_havesp() without additional synchronizations. + */ + if (cap == IPSEC_CAP_OPERABLE) + return (key_havesp(IPSEC_DIR_INBOUND) != 0 || + key_havesp(IPSEC_DIR_OUTBOUND) != 0); + return (ipsec_kmod_caps(sc, m, cap)); +} + +void +ipsec_support_enable(struct ipsec_support * const sc, + const struct ipsec_methods * const methods) +{ + + KASSERT(sc->enabled == 0, ("IPsec already enabled")); + sc->methods = methods; + sc->enabled |= IPSEC_MODULE_ENABLED; +} + +void +ipsec_support_disable(struct ipsec_support * const sc) +{ + + if (sc->enabled & IPSEC_MODULE_ENABLED) { + ipsec_kmod_drain(&sc->enabled); + sc->methods = NULL; + } +} +#endif /* !IPSEC */ +#endif /* IPSEC_SUPPORT */ diff --git a/freebsd/sys/netipsec/udpencap.c b/freebsd/sys/netipsec/udpencap.c new file mode 100644 index 00000000..fd2ca444 --- /dev/null +++ b/freebsd/sys/netipsec/udpencap.c @@ -0,0 +1,299 @@ +#include + +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Handle UDP_ENCAP socket option. Always return with released INP_WLOCK. + */ +int +udp_ipsec_pcbctl(struct inpcb *inp, struct sockopt *sopt) +{ + struct udpcb *up; + int error, optval; + + INP_WLOCK_ASSERT(inp); + if (sopt->sopt_name != UDP_ENCAP) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + + up = intoudpcb(inp); + if (sopt->sopt_dir == SOPT_GET) { + if (up->u_flags & UF_ESPINUDP) + optval = UDP_ENCAP_ESPINUDP; + else + optval = 0; + INP_WUNLOCK(inp); + return (sooptcopyout(sopt, &optval, sizeof(optval))); + } + INP_WUNLOCK(inp); + + error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); + if (error != 0) + return (error); + + INP_WLOCK(inp); + switch (optval) { + case 0: + up->u_flags &= ~UF_ESPINUDP; + break; + case UDP_ENCAP_ESPINUDP: + up->u_flags |= UF_ESPINUDP; + break; + default: + error = EINVAL; + } + INP_WUNLOCK(inp); + return (error); +} + +/* + * Potentially decap ESP in UDP frame. Check for an ESP header. + * If present, strip the UDP header and push the result through IPSec. + * + * Returns error if mbuf consumed and/or processed, otherwise 0. + */ +int +udp_ipsec_input(struct mbuf *m, int off, int af) +{ + union sockaddr_union dst; + struct secasvar *sav; + struct udphdr *udp; + struct ip *ip; + uint32_t spi; + int error, hlen; + + /* + * Just return if packet doesn't have enough data. + * We need at least [IP header + UDP header + ESP header]. + * NAT-Keepalive packet has only one byte of payload, so it + * by default will not be processed. + */ + if (m->m_pkthdr.len < off + sizeof(struct esp)) + return (0); + + m_copydata(m, off, sizeof(uint32_t), (caddr_t)&spi); + if (spi == 0) /* Non-ESP marker. */ + return (0); + + /* + * Find SA and check that it is configured for UDP + * encapsulation. + */ + bzero(&dst, sizeof(dst)); + dst.sa.sa_family = af; + switch (af) { +#ifdef INET + case AF_INET: + dst.sin.sin_len = sizeof(struct sockaddr_in); + ip = mtod(m, struct ip *); + ip->ip_p = IPPROTO_ESP; + off = offsetof(struct ip, ip_p); + hlen = ip->ip_hl << 2; + dst.sin.sin_addr = ip->ip_dst; + break; +#endif +#ifdef INET6 + case AF_INET6: + /* Not yet */ + /* FALLTHROUGH */ +#endif + default: + ESPSTAT_INC(esps_nopf); + m_freem(m); + return (EPFNOSUPPORT); + } + + sav = key_allocsa(&dst, IPPROTO_ESP, spi); + if (sav == NULL) { + ESPSTAT_INC(esps_notdb); + m_freem(m); + return (ENOENT); + } + udp = mtodo(m, hlen); + if (sav->natt == NULL || + sav->natt->sport != udp->uh_sport || + sav->natt->dport != udp->uh_dport) { + /* XXXAE: should we check source address? */ + ESPSTAT_INC(esps_notdb); + key_freesav(&sav); + m_freem(m); + return (ENOENT); + } + /* + * Remove the UDP header + * Before: + * <--- off ---> + * +----+------+-----+ + * | IP | UDP | ESP | + * +----+------+-----+ + * <-skip-> + * After: + * +----+-----+ + * | IP | ESP | + * +----+-----+ + * <-skip-> + */ + m_striphdr(m, hlen, sizeof(*udp)); + /* + * We cannot yet update the cksums so clear any h/w cksum flags + * as they are no longer valid. + */ + if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) + m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + /* + * We can update ip_len and ip_sum here, but ipsec4_input_cb() + * will do this anyway, so don't touch them here. + */ + ESPSTAT_INC(esps_input); + error = (*sav->tdb_xform->xf_input)(m, sav, hlen, off); + if (error != 0) + key_freesav(&sav); + + return (EINPROGRESS); /* Consumed by IPsec. */ +} + +int +udp_ipsec_output(struct mbuf *m, struct secasvar *sav) +{ + struct udphdr *udp; + struct mbuf *n; + struct ip *ip; + int hlen, off; + + IPSEC_ASSERT(sav->natt != NULL, ("UDP encapsulation isn't required.")); + + if (sav->sah->saidx.dst.sa.sa_family == AF_INET6) + return (EAFNOSUPPORT); + + ip = mtod(m, struct ip *); + hlen = ip->ip_hl << 2; + n = m_makespace(m, hlen, sizeof(*udp), &off); + if (n == NULL) { + DPRINTF(("%s: m_makespace for udphdr failed\n", __func__)); + return (ENOBUFS); + } + + udp = mtodo(n, off); + udp->uh_dport = sav->natt->dport; + udp->uh_sport = sav->natt->sport; + udp->uh_sum = 0; + udp->uh_ulen = htons(m->m_pkthdr.len - hlen); + + ip = mtod(m, struct ip *); + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_p = IPPROTO_UDP; + return (0); +} + +void +udp_ipsec_adjust_cksum(struct mbuf *m, struct secasvar *sav, int proto, + int skip) +{ + struct ip *ip; + uint16_t cksum, off; + + IPSEC_ASSERT(sav->natt != NULL, ("NAT-T isn't required")); + IPSEC_ASSERT(proto == IPPROTO_UDP || proto == IPPROTO_TCP, + ("unexpected protocol %u", proto)); + + if (proto == IPPROTO_UDP) + off = offsetof(struct udphdr, uh_sum); + else + off = offsetof(struct tcphdr, th_sum); + + if (V_natt_cksum_policy == 0) { /* auto */ + if (sav->natt->cksum != 0) { + /* Incrementally recompute. */ + m_copydata(m, skip + off, sizeof(cksum), + (caddr_t)&cksum); + /* Do not adjust UDP checksum if it is zero. */ + if (proto == IPPROTO_UDP && cksum == 0) + return; + cksum = in_addword(cksum, sav->natt->cksum); + } else { + /* No OA from IKEd. */ + if (proto == IPPROTO_TCP) { + /* Ignore for TCP. */ + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | + CSUM_PSEUDO_HDR); + return; + } + cksum = 0; /* Reset for UDP. */ + } + m_copyback(m, skip + off, sizeof(cksum), (caddr_t)&cksum); + } else { /* Fully recompute */ + ip = mtod(m, struct ip *); + cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(m->m_pkthdr.len - skip + proto)); + m_copyback(m, skip + off, sizeof(cksum), (caddr_t)&cksum); + m->m_pkthdr.csum_flags = + (proto == IPPROTO_UDP) ? CSUM_UDP: CSUM_TCP; + m->m_pkthdr.csum_data = off; + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +} + diff --git a/freebsd/sys/netipsec/xform.h b/freebsd/sys/netipsec/xform.h index e389cab3..8e6f8bdb 100644 --- a/freebsd/sys/netipsec/xform.h +++ b/freebsd/sys/netipsec/xform.h @@ -42,6 +42,7 @@ #define _NETIPSEC_XFORM_H_ #include +#include #include #include @@ -49,83 +50,68 @@ #define AH_HMAC_MAXHASHLEN (SHA2_512_HASH_LEN/2) /* Keep this updated */ #define AH_HMAC_INITIAL_RPL 1 /* replay counter initial value */ +#ifdef _KERNEL +struct secpolicy; +struct secasvar; + /* * Packet tag assigned on completion of IPsec processing; used - * to speedup processing when/if the packet comes back for more - * processing. + * to speedup security policy checking for INBOUND packets. */ -struct tdb_ident { - u_int32_t spi; - union sockaddr_union dst; - u_int8_t proto; - /* Cache those two for enc(4) in xform_ipip. */ - u_int8_t alg_auth; - u_int8_t alg_enc; +struct xform_history { + union sockaddr_union dst; /* destination address */ + uint32_t spi; /* Security Parameters Index */ + uint8_t proto; /* IPPROTO_ESP or IPPROTO_AH */ + uint8_t mode; /* transport or tunnel */ }; /* * Opaque data structure hung off a crypto operation descriptor. */ -struct tdb_crypto { - struct ipsecrequest *tc_isr; /* ipsec request state */ - u_int32_t tc_spi; /* associated SPI */ - union sockaddr_union tc_dst; /* dst addr of packet */ - u_int8_t tc_proto; /* current protocol, e.g. AH */ - u_int8_t tc_nxt; /* next protocol, e.g. IPV4 */ - int tc_protoff; /* current protocol offset */ - int tc_skip; /* data offset */ - caddr_t tc_ptr; /* associated crypto data */ - struct secasvar *tc_sav; /* related SA */ +struct xform_data { + struct secpolicy *sp; /* security policy */ + struct secasvar *sav; /* related SA */ + uint64_t cryptoid; /* used crypto session id */ + u_int idx; /* IPsec request index */ + int protoff; /* current protocol offset */ + int skip; /* data offset */ + uint8_t nxt; /* next protocol, e.g. IPV4 */ }; -struct secasvar; -struct ipescrequest; - -struct xformsw { - u_short xf_type; /* xform ID */ -#define XF_IP4 1 /* IP inside IP */ +#define XF_IP4 1 /* unused */ #define XF_AH 2 /* AH */ #define XF_ESP 3 /* ESP */ #define XF_TCPSIGNATURE 5 /* TCP MD5 Signature option, RFC 2358 */ #define XF_IPCOMP 6 /* IPCOMP */ - u_short xf_flags; -#define XFT_AUTH 0x0001 -#define XFT_CONF 0x0100 -#define XFT_COMP 0x1000 - char *xf_name; /* human-readable name */ + +struct xformsw { + u_short xf_type; /* xform ID */ + char *xf_name; /* human-readable name */ int (*xf_init)(struct secasvar*, struct xformsw*); /* setup */ int (*xf_zeroize)(struct secasvar*); /* cleanup */ int (*xf_input)(struct mbuf*, struct secasvar*, /* input */ int, int); - int (*xf_output)(struct mbuf*, /* output */ - struct ipsecrequest *, struct mbuf **, int, int); - struct xformsw *xf_next; /* list of registered xforms */ + int (*xf_output)(struct mbuf*, /* output */ + struct secpolicy *, struct secasvar *, u_int, int, int); + LIST_ENTRY(xformsw) chain; }; -#ifdef _KERNEL -extern void xform_register(struct xformsw*); -extern int xform_init(struct secasvar *sav, int xftype); +const struct enc_xform * enc_algorithm_lookup(int); +const struct auth_hash * auth_algorithm_lookup(int); +const struct comp_algo * comp_algorithm_lookup(int); -struct cryptoini; - -/* XF_IP4 */ -extern int ip4_input6(struct mbuf **m, int *offp, int proto); -extern void ip4_input(struct mbuf *m, int); -extern int ipip_output(struct mbuf *, struct ipsecrequest *, - struct mbuf **, int, int); +void xform_attach(void *); +void xform_detach(void *); +struct cryptoini; /* XF_AH */ +int xform_ah_authsize(const struct auth_hash *); extern int ah_init0(struct secasvar *, struct xformsw *, struct cryptoini *); extern int ah_zeroize(struct secasvar *sav); -extern struct auth_hash *ah_algorithm_lookup(int alg); extern size_t ah_hdrsiz(struct secasvar *); /* XF_ESP */ -extern struct enc_xform *esp_algorithm_lookup(int alg); extern size_t esp_hdrsiz(struct secasvar *sav); -/* XF_COMP */ -extern struct comp_algo *ipcomp_algorithm_lookup(int alg); - #endif /* _KERNEL */ #endif /* _NETIPSEC_XFORM_H_ */ diff --git a/freebsd/sys/netipsec/xform_ah.c b/freebsd/sys/netipsec/xform_ah.c index f1304c24..5dd41282 100644 --- a/freebsd/sys/netipsec/xform_ah.c +++ b/freebsd/sys/netipsec/xform_ah.c @@ -47,6 +47,8 @@ #include #include #include +#include +#include #include #include @@ -58,7 +60,6 @@ #include #include -#include #include #include #include @@ -83,24 +84,29 @@ (((sav)->flags & SADB_X_EXT_OLD) ? \ sizeof (struct ah) : sizeof (struct ah) + sizeof (u_int32_t)) /* - * Return authenticator size in bytes. The old protocol is known - * to use a fixed 16-byte authenticator. The new algorithm use 12-byte - * authenticator. + * Return authenticator size in bytes, based on a field in the + * algorithm descriptor. */ -#define AUTHSIZE(sav) ah_authsize(sav) +#define AUTHSIZE(sav) ((sav->flags & SADB_X_EXT_OLD) ? 16 : \ + xform_ah_authsize((sav)->tdb_authalgxform)) VNET_DEFINE(int, ah_enable) = 1; /* control flow of packets with AH */ VNET_DEFINE(int, ah_cleartos) = 1; /* clear ip_tos when doing AH calc */ -VNET_DEFINE(struct ahstat, ahstat); +VNET_PCPUSTAT_DEFINE(struct ahstat, ahstat); +VNET_PCPUSTAT_SYSINIT(ahstat); + +#ifdef VIMAGE +VNET_PCPUSTAT_SYSUNINIT(ahstat); +#endif /* VIMAGE */ #ifdef INET SYSCTL_DECL(_net_inet_ah); -SYSCTL_VNET_INT(_net_inet_ah, OID_AUTO, - ah_enable, CTLFLAG_RW, &VNET_NAME(ah_enable), 0, ""); -SYSCTL_VNET_INT(_net_inet_ah, OID_AUTO, - ah_cleartos, CTLFLAG_RW, &VNET_NAME(ah_cleartos), 0, ""); -SYSCTL_VNET_STRUCT(_net_inet_ah, IPSECCTL_STATS, - stats, CTLFLAG_RD, &VNET_NAME(ahstat), ahstat, ""); +SYSCTL_INT(_net_inet_ah, OID_AUTO, ah_enable, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ah_enable), 0, ""); +SYSCTL_INT(_net_inet_ah, OID_AUTO, ah_cleartos, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ah_cleartos), 0, ""); +SYSCTL_VNET_PCPUSTAT(_net_inet_ah, IPSECCTL_STATS, stats, struct ahstat, + ahstat, "AH statistics (struct ahstat, netipsec/ah_var.h)"); #endif static unsigned char ipseczeroes[256]; /* larger than an ip6 extension hdr */ @@ -108,56 +114,33 @@ static unsigned char ipseczeroes[256]; /* larger than an ip6 extension hdr */ static int ah_input_cb(struct cryptop*); static int ah_output_cb(struct cryptop*); -static int -ah_authsize(struct secasvar *sav) +int +xform_ah_authsize(const struct auth_hash *esph) { + int alen; - IPSEC_ASSERT(sav != NULL, ("%s: sav == NULL", __func__)); + if (esph == NULL) + return 0; - if (sav->flags & SADB_X_EXT_OLD) - return 16; + switch (esph->type) { + case CRYPTO_SHA2_256_HMAC: + case CRYPTO_SHA2_384_HMAC: + case CRYPTO_SHA2_512_HMAC: + alen = esph->hashsize / 2; /* RFC4868 2.3 */ + break; + + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + alen = esph->hashsize; + break; - switch (sav->alg_auth) { - case SADB_X_AALG_SHA2_256: - return 16; - case SADB_X_AALG_SHA2_384: - return 24; - case SADB_X_AALG_SHA2_512: - return 32; default: - return AH_HMAC_HASHLEN; - } - /* NOTREACHED */ -} -/* - * NB: this is public for use by the PF_KEY support. - */ -struct auth_hash * -ah_algorithm_lookup(int alg) -{ - if (alg > SADB_AALG_MAX) - return NULL; - switch (alg) { - case SADB_X_AALG_NULL: - return &auth_hash_null; - case SADB_AALG_MD5HMAC: - return &auth_hash_hmac_md5; - case SADB_AALG_SHA1HMAC: - return &auth_hash_hmac_sha1; - case SADB_X_AALG_RIPEMD160HMAC: - return &auth_hash_hmac_ripemd_160; - case SADB_X_AALG_MD5: - return &auth_hash_key_md5; - case SADB_X_AALG_SHA: - return &auth_hash_key_sha1; - case SADB_X_AALG_SHA2_256: - return &auth_hash_hmac_sha2_256; - case SADB_X_AALG_SHA2_384: - return &auth_hash_hmac_sha2_384; - case SADB_X_AALG_SHA2_512: - return &auth_hash_hmac_sha2_512; + alen = AH_HMAC_HASHLEN; + break; } - return NULL; + + return alen; } size_t @@ -184,10 +167,10 @@ ah_hdrsiz(struct secasvar *sav) int ah_init0(struct secasvar *sav, struct xformsw *xsp, struct cryptoini *cria) { - struct auth_hash *thash; + const struct auth_hash *thash; int keylen; - thash = ah_algorithm_lookup(sav->alg_auth); + thash = auth_algorithm_lookup(sav->alg_auth); if (thash == NULL) { DPRINTF(("%s: unsupported authentication algorithm %u\n", __func__, sav->alg_auth)); @@ -307,23 +290,10 @@ ah_massage_headers(struct mbuf **m0, int proto, int skip, int alg, int out) ip->ip_ttl = 0; ip->ip_sum = 0; - /* - * On input, fix ip_len which has been byte-swapped - * at ip_input(). - */ - if (!out) { - ip->ip_len = htons(ip->ip_len + skip); - - if (alg == CRYPTO_MD5_KPDK || alg == CRYPTO_SHA1_KPDK) - ip->ip_off = htons(ip->ip_off & IP_DF); - else - ip->ip_off = 0; - } else { - if (alg == CRYPTO_MD5_KPDK || alg == CRYPTO_SHA1_KPDK) - ip->ip_off = htons(ntohs(ip->ip_off) & IP_DF); - else - ip->ip_off = 0; - } + if (alg == CRYPTO_MD5_KPDK || alg == CRYPTO_SHA1_KPDK) + ip->ip_off &= htons(IP_DF); + else + ip->ip_off = htons(0); ptr = mtod(m, unsigned char *) + sizeof(struct ip); @@ -576,15 +546,14 @@ ah_massage_headers(struct mbuf **m0, int proto, int skip, int alg, int out) static int ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { - struct auth_hash *ahx; - struct tdb_ident *tdbi; - struct tdb_crypto *tc; - struct m_tag *mtag; - struct newah *ah; - int hl, rplen, authsize; - + char buf[128]; + const struct auth_hash *ahx; struct cryptodesc *crda; struct cryptop *crp; + struct xform_data *xd; + struct newah *ah; + uint64_t cryptoid; + int hl, rplen, authsize, error; IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->key_auth != NULL, ("null authentication key")); @@ -604,13 +573,18 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) } /* Check replay window, if applicable. */ - if (sav->replay && !ipsec_chkreplay(ntohl(ah->ah_seq), sav)) { + SECASVAR_LOCK(sav); + if (sav->replay != NULL && sav->replay->wsize != 0 && + ipsec_chkreplay(ntohl(ah->ah_seq), sav) == 0) { + SECASVAR_UNLOCK(sav); AHSTAT_INC(ahs_replay); DPRINTF(("%s: packet replay failure: %s\n", __func__, - ipsec_logsastr(sav))); + ipsec_sa2str(sav, buf, sizeof(buf)))); m_freem(m); - return ENOBUFS; + return (EACCES); } + cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); /* Verify AH header length. */ hl = ah->ah_len * sizeof (u_int32_t); @@ -618,10 +592,10 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) authsize = AUTHSIZE(sav); if (hl != authsize + rplen - sizeof (struct ah)) { DPRINTF(("%s: bad authenticator length %u (expecting %lu)" - " for packet in SA %s/%08lx\n", __func__, - hl, (u_long) (authsize + rplen - sizeof (struct ah)), - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + " for packet in SA %s/%08lx\n", __func__, hl, + (u_long) (authsize + rplen - sizeof (struct ah)), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_badauthl); m_freem(m); return EACCES; @@ -631,7 +605,8 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) /* Get crypto descriptors. */ crp = crypto_getreq(1); if (crp == NULL) { - DPRINTF(("%s: failed to acquire crypto descriptor\n",__func__)); + DPRINTF(("%s: failed to acquire crypto descriptor\n", + __func__)); AHSTAT_INC(ahs_crypto); m_freem(m); return ENOBUFS; @@ -649,58 +624,35 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) crda->crd_klen = _KEYBITS(sav->key_auth); crda->crd_key = sav->key_auth->key_data; - /* Find out if we've already done crypto. */ - for (mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_CRYPTO_DONE, NULL); - mtag != NULL; - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_CRYPTO_DONE, mtag)) { - tdbi = (struct tdb_ident *) (mtag + 1); - if (tdbi->proto == sav->sah->saidx.proto && - tdbi->spi == sav->spi && - !bcmp(&tdbi->dst, &sav->sah->saidx.dst, - sizeof (union sockaddr_union))) - break; - } - /* Allocate IPsec-specific opaque crypto info. */ - if (mtag == NULL) { - tc = (struct tdb_crypto *) malloc(sizeof (struct tdb_crypto) + - skip + rplen + authsize, M_XDATA, M_NOWAIT|M_ZERO); - } else { - /* Hash verification has already been done successfully. */ - tc = (struct tdb_crypto *) malloc(sizeof (struct tdb_crypto), - M_XDATA, M_NOWAIT|M_ZERO); - } - if (tc == NULL) { - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + xd = malloc(sizeof(*xd) + skip + rplen + authsize, M_XDATA, + M_NOWAIT | M_ZERO); + if (xd == NULL) { + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); AHSTAT_INC(ahs_crypto); crypto_freereq(crp); m_freem(m); return ENOBUFS; } - /* Only save information if crypto processing is needed. */ - if (mtag == NULL) { - int error; + /* + * Save the authenticator, the skipped portion of the packet, + * and the AH header. + */ + m_copydata(m, 0, skip + rplen + authsize, (caddr_t)(xd + 1)); - /* - * Save the authenticator, the skipped portion of the packet, - * and the AH header. - */ - m_copydata(m, 0, skip + rplen + authsize, (caddr_t)(tc+1)); - - /* Zeroize the authenticator on the packet. */ - m_copyback(m, skip + rplen, authsize, ipseczeroes); - - /* "Massage" the packet headers for crypto processing. */ - error = ah_massage_headers(&m, sav->sah->saidx.dst.sa.sa_family, - skip, ahx->type, 0); - if (error != 0) { - /* NB: mbuf is free'd by ah_massage_headers */ - AHSTAT_INC(ahs_hdrops); - free(tc, M_XDATA); - crypto_freereq(crp); - return error; - } + /* Zeroize the authenticator on the packet. */ + m_copyback(m, skip + rplen, authsize, ipseczeroes); + + /* "Massage" the packet headers for crypto processing. */ + error = ah_massage_headers(&m, sav->sah->saidx.dst.sa.sa_family, + skip, ahx->type, 0); + if (error != 0) { + /* NB: mbuf is free'd by ah_massage_headers */ + AHSTAT_INC(ahs_hdrops); + free(xd, M_XDATA); + crypto_freereq(crp); + return (error); } /* Crypto operation descriptor. */ @@ -708,24 +660,16 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = ah_input_cb; - crp->crp_sid = sav->tdb_cryptoid; - crp->crp_opaque = (caddr_t) tc; + crp->crp_sid = cryptoid; + crp->crp_opaque = (caddr_t) xd; /* These are passed as-is to the callback. */ - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_nxt = ah->ah_nxt; - tc->tc_protoff = protoff; - tc->tc_skip = skip; - tc->tc_ptr = (caddr_t) mtag; /* Save the mtag we've identified. */ - KEY_ADDREFSA(sav); - tc->tc_sav = sav; - - if (mtag == NULL) - return crypto_dispatch(crp); - else - return ah_input_cb(crp); + xd->sav = sav; + xd->nxt = ah->ah_nxt; + xd->protoff = protoff; + xd->skip = skip; + xd->cryptoid = cryptoid; + return (crypto_dispatch(crp)); } /* @@ -734,49 +678,43 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) static int ah_input_cb(struct cryptop *crp) { - int rplen, error, skip, protoff; + char buf[IPSEC_ADDRSTRLEN]; unsigned char calc[AH_ALEN_MAX]; + const struct auth_hash *ahx; struct mbuf *m; struct cryptodesc *crd; - struct auth_hash *ahx; - struct tdb_crypto *tc; - struct m_tag *mtag; + struct xform_data *xd; struct secasvar *sav; struct secasindex *saidx; - u_int8_t nxt; caddr_t ptr; - int authsize; + uint64_t cryptoid; + int authsize, rplen, error, skip, protoff; + uint8_t nxt; crd = crp->crp_desc; - - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!")); - skip = tc->tc_skip; - nxt = tc->tc_nxt; - protoff = tc->tc_protoff; - mtag = (struct m_tag *) tc->tc_ptr; m = (struct mbuf *) crp->crp_buf; - - sav = tc->tc_sav; - IPSEC_ASSERT(sav != NULL, ("null SA!")); - + xd = (struct xform_data *) crp->crp_opaque; + sav = xd->sav; + skip = xd->skip; + nxt = xd->nxt; + protoff = xd->protoff; + cryptoid = xd->cryptoid; saidx = &sav->sah->saidx; IPSEC_ASSERT(saidx->dst.sa.sa_family == AF_INET || saidx->dst.sa.sa_family == AF_INET6, ("unexpected protocol family %u", saidx->dst.sa.sa_family)); - ahx = (struct auth_hash *) sav->tdb_authalgxform; + ahx = sav->tdb_authalgxform; /* Check for crypto errors. */ if (crp->crp_etype) { - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - error = crypto_dispatch(crp); - return error; + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; + return (crypto_dispatch(crp)); } - AHSTAT_INC(ahs_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; @@ -802,35 +740,23 @@ ah_input_cb(struct cryptop *crp) /* Copy authenticator off the packet. */ m_copydata(m, skip + rplen, authsize, calc); - /* - * If we have an mtag, we don't need to verify the authenticator -- - * it has been verified by an IPsec-aware NIC. - */ - if (mtag == NULL) { - ptr = (caddr_t) (tc + 1); - - /* Verify authenticator. */ - if (bcmp(ptr + skip + rplen, calc, authsize)) { - DPRINTF(("%s: authentication hash mismatch for packet " - "in SA %s/%08lx\n", __func__, - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - AHSTAT_INC(ahs_badauth); - error = EACCES; - goto bad; - } - - /* Fix the Next Protocol field. */ - ((u_int8_t *) ptr)[protoff] = nxt; - - /* Copyback the saved (uncooked) network headers. */ - m_copyback(m, 0, skip, ptr); - } else { - /* Fix the Next Protocol field. */ - m_copyback(m, protoff, sizeof(u_int8_t), &nxt); + /* Verify authenticator. */ + ptr = (caddr_t) (xd + 1); + if (timingsafe_bcmp(ptr + skip + rplen, calc, authsize)) { + DPRINTF(("%s: authentication hash mismatch for packet " + "in SA %s/%08lx\n", __func__, + ipsec_address(&saidx->dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); + AHSTAT_INC(ahs_badauth); + error = EACCES; + goto bad; } + /* Fix the Next Protocol field. */ + ((uint8_t *) ptr)[protoff] = nxt; - free(tc, M_XDATA), tc = NULL; /* No longer needed */ + /* Copyback the saved (uncooked) network headers. */ + m_copyback(m, 0, skip, ptr); + free(xd, M_XDATA), xd = NULL; /* No longer needed */ /* * Header is now authenticated. @@ -845,11 +771,14 @@ ah_input_cb(struct cryptop *crp) m_copydata(m, skip + offsetof(struct newah, ah_seq), sizeof (seq), (caddr_t) &seq); + SECASVAR_LOCK(sav); if (ipsec_updatereplay(ntohl(seq), sav)) { + SECASVAR_UNLOCK(sav); AHSTAT_INC(ahs_replay); - error = ENOBUFS; /*XXX as above*/ + error = EACCES; goto bad; } + SECASVAR_UNLOCK(sav); } /* @@ -858,8 +787,8 @@ ah_input_cb(struct cryptop *crp) error = m_striphdr(m, skip, rplen + authsize); if (error) { DPRINTF(("%s: mangled mbuf chain for SA %s/%08lx\n", __func__, - ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi))); - + ipsec_address(&saidx->dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_hdrops); goto bad; } @@ -867,56 +796,50 @@ ah_input_cb(struct cryptop *crp) switch (saidx->dst.sa.sa_family) { #ifdef INET6 case AF_INET6: - error = ipsec6_common_input_cb(m, sav, skip, protoff, mtag); + error = ipsec6_common_input_cb(m, sav, skip, protoff); break; #endif #ifdef INET case AF_INET: - error = ipsec4_common_input_cb(m, sav, skip, protoff, mtag); + error = ipsec4_common_input_cb(m, sav, skip, protoff); break; #endif default: panic("%s: Unexpected address family: %d saidx=%p", __func__, saidx->dst.sa.sa_family, saidx); } - - KEY_FREESAV(&sav); return error; bad: if (sav) - KEY_FREESAV(&sav); + key_freesav(&sav); if (m != NULL) m_freem(m); - if (tc != NULL) - free(tc, M_XDATA); + if (xd != NULL) + free(xd, M_XDATA); if (crp != NULL) crypto_freereq(crp); return error; } /* - * AH output routine, called by ipsec[46]_process_packet(). + * AH output routine, called by ipsec[46]_perform_request(). */ static int -ah_output( - struct mbuf *m, - struct ipsecrequest *isr, - struct mbuf **mp, - int skip, - int protoff) +ah_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, + u_int idx, int skip, int protoff) { - struct secasvar *sav; - struct auth_hash *ahx; + char buf[IPSEC_ADDRSTRLEN]; + const struct auth_hash *ahx; struct cryptodesc *crda; - struct tdb_crypto *tc; + struct xform_data *xd; struct mbuf *mi; struct cryptop *crp; - u_int16_t iplen; - int error, rplen, authsize, maxpacketsize, roff; - u_int8_t prot; struct newah *ah; + uint64_t cryptoid; + uint16_t iplen; + int error, rplen, authsize, maxpacketsize, roff; + uint8_t prot; - sav = isr->sav; IPSEC_ASSERT(sav != NULL, ("null SA")); ahx = sav->tdb_authalgxform; IPSEC_ASSERT(ahx != NULL, ("null authentication xform")); @@ -942,7 +865,7 @@ ah_output( DPRINTF(("%s: unknown/unsupported protocol family %u, " "SA %s/%08lx\n", __func__, sav->sah->saidx.dst.sa.sa_family, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_nopf); error = EPFNOSUPPORT; @@ -952,7 +875,7 @@ ah_output( if (rplen + authsize + m->m_pkthdr.len > maxpacketsize) { DPRINTF(("%s: packet in SA %s/%08lx got too big " "(len %u, max len %u)\n", __func__, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi), rplen + authsize + m->m_pkthdr.len, maxpacketsize)); AHSTAT_INC(ahs_toobig); @@ -966,7 +889,7 @@ ah_output( m = m_unshare(m, M_NOWAIT); if (m == NULL) { DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_hdrops); error = ENOBUFS; @@ -979,7 +902,7 @@ ah_output( DPRINTF(("%s: failed to inject %u byte AH header for SA " "%s/%08lx\n", __func__, rplen + authsize, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_hdrops); /*XXX differs from openbsd */ error = ENOBUFS; @@ -1002,15 +925,16 @@ ah_output( m_copyback(m, skip + rplen, authsize, ipseczeroes); /* Insert packet replay counter, as requested. */ + SECASVAR_LOCK(sav); if (sav->replay) { if (sav->replay->count == ~0 && (sav->flags & SADB_X_EXT_CYCSEQ) == 0) { + SECASVAR_UNLOCK(sav); DPRINTF(("%s: replay counter wrapped for SA %s/%08lx\n", - __func__, - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + __func__, ipsec_address(&sav->sah->saidx.dst, buf, + sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_wrap); - error = EINVAL; + error = EACCES; goto bad; } #ifdef REGRESSION @@ -1020,6 +944,8 @@ ah_output( sav->replay->count++; ah->ah_seq = htonl(sav->replay->count); } + cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); /* Get crypto descriptors. */ crp = crypto_getreq(1); @@ -1032,7 +958,6 @@ ah_output( } crda = crp->crp_desc; - crda->crd_skip = 0; crda->crd_inject = skip + rplen; crda->crd_len = m->m_pkthdr.len; @@ -1043,18 +968,18 @@ ah_output( crda->crd_klen = _KEYBITS(sav->key_auth); /* Allocate IPsec-specific opaque crypto info. */ - tc = (struct tdb_crypto *) malloc( - sizeof(struct tdb_crypto) + skip, M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { + xd = malloc(sizeof(struct xform_data) + skip, M_XDATA, + M_NOWAIT | M_ZERO); + if (xd == NULL) { crypto_freereq(crp); - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); AHSTAT_INC(ahs_crypto); error = ENOBUFS; goto bad; } /* Save the skipped portion of the packet. */ - m_copydata(m, 0, skip, (caddr_t) (tc + 1)); + m_copydata(m, 0, skip, (caddr_t) (xd + 1)); /* * Fix IP header length on the header used for @@ -1064,7 +989,7 @@ ah_output( switch (sav->sah->saidx.dst.sa.sa_family) { #ifdef INET case AF_INET: - bcopy(((caddr_t)(tc + 1)) + + bcopy(((caddr_t)(xd + 1)) + offsetof(struct ip, ip_len), (caddr_t) &iplen, sizeof(u_int16_t)); iplen = htons(ntohs(iplen) + rplen + authsize); @@ -1075,29 +1000,29 @@ ah_output( #ifdef INET6 case AF_INET6: - bcopy(((caddr_t)(tc + 1)) + + bcopy(((caddr_t)(xd + 1)) + offsetof(struct ip6_hdr, ip6_plen), - (caddr_t) &iplen, sizeof(u_int16_t)); + (caddr_t) &iplen, sizeof(uint16_t)); iplen = htons(ntohs(iplen) + rplen + authsize); m_copyback(m, offsetof(struct ip6_hdr, ip6_plen), - sizeof(u_int16_t), (caddr_t) &iplen); + sizeof(uint16_t), (caddr_t) &iplen); break; #endif /* INET6 */ } /* Fix the Next Header field in saved header. */ - ((u_int8_t *) (tc + 1))[protoff] = IPPROTO_AH; + ((uint8_t *) (xd + 1))[protoff] = IPPROTO_AH; /* Update the Next Protocol field in the IP header. */ prot = IPPROTO_AH; - m_copyback(m, protoff, sizeof(u_int8_t), (caddr_t) &prot); + m_copyback(m, protoff, sizeof(uint8_t), (caddr_t) &prot); /* "Massage" the packet headers for crypto processing. */ error = ah_massage_headers(&m, sav->sah->saidx.dst.sa.sa_family, skip, ahx->type, 1); if (error != 0) { m = NULL; /* mbuf was free'd by ah_massage_headers. */ - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); goto bad; } @@ -1107,18 +1032,15 @@ ah_output( crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = ah_output_cb; - crp->crp_sid = sav->tdb_cryptoid; - crp->crp_opaque = (caddr_t) tc; + crp->crp_sid = cryptoid; + crp->crp_opaque = (caddr_t) xd; /* These are passed as-is to the callback. */ - tc->tc_isr = isr; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_skip = skip; - tc->tc_protoff = protoff; + xd->sp = sp; + xd->sav = sav; + xd->skip = skip; + xd->idx = idx; + xd->cryptoid = cryptoid; return crypto_dispatch(crp); bad: @@ -1133,46 +1055,37 @@ bad: static int ah_output_cb(struct cryptop *crp) { - int skip, protoff, error; - struct tdb_crypto *tc; - struct ipsecrequest *isr; + struct xform_data *xd; + struct secpolicy *sp; struct secasvar *sav; struct mbuf *m; + uint64_t cryptoid; caddr_t ptr; - int err; + u_int idx; + int skip, error; - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque data area!")); - skip = tc->tc_skip; - protoff = tc->tc_protoff; - ptr = (caddr_t) (tc + 1); m = (struct mbuf *) crp->crp_buf; - - isr = tc->tc_isr; - IPSECREQUEST_LOCK(isr); - sav = tc->tc_sav; - /* With the isr lock released SA pointer can be updated. */ - if (sav != isr->sav) { - AHSTAT_INC(ahs_notdb); - DPRINTF(("%s: SA expired while in crypto\n", __func__)); - error = ENOBUFS; /*XXX*/ - goto bad; - } + xd = (struct xform_data *) crp->crp_opaque; + sp = xd->sp; + sav = xd->sav; + skip = xd->skip; + idx = xd->idx; + cryptoid = xd->cryptoid; + ptr = (caddr_t) (xd + 1); /* Check for crypto errors. */ if (crp->crp_etype) { - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - IPSECREQUEST_UNLOCK(isr); - error = crypto_dispatch(crp); - return error; + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; + return (crypto_dispatch(crp)); } - AHSTAT_INC(ahs_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; + m_freem(m); goto bad; } @@ -1183,18 +1096,15 @@ ah_output_cb(struct cryptop *crp) error = EINVAL; goto bad; } - AHSTAT_INC(ahs_hist[sav->alg_auth]); - /* * Copy original headers (with the new protocol number) back * in place. */ m_copyback(m, 0, skip, ptr); - /* No longer needed. */ - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); - + AHSTAT_INC(ahs_hist[sav->alg_auth]); #ifdef REGRESSION /* Emulate man-in-the-middle attack when ipsec_integrity is TRUE. */ if (V_ipsec_integrity) { @@ -1210,31 +1120,26 @@ ah_output_cb(struct cryptop *crp) #endif /* NB: m is reclaimed by ipsec_process_done. */ - err = ipsec_process_done(m, isr); - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - return err; + error = ipsec_process_done(m, sp, sav, idx); + return (error); bad: - if (sav) - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - if (m) - m_freem(m); - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); - return error; + key_freesav(&sav); + key_freesp(&sp); + return (error); } static struct xformsw ah_xformsw = { - XF_AH, XFT_AUTH, "IPsec AH", - ah_init, ah_zeroize, ah_input, ah_output, + .xf_type = XF_AH, + .xf_name = "IPsec AH", + .xf_init = ah_init, + .xf_zeroize = ah_zeroize, + .xf_input = ah_input, + .xf_output = ah_output, }; -static void -ah_attach(void) -{ - - xform_register(&ah_xformsw); -} - -SYSINIT(ah_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ah_attach, NULL); +SYSINIT(ah_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + xform_attach, &ah_xformsw); +SYSUNINIT(ah_xform_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + xform_detach, &ah_xformsw); diff --git a/freebsd/sys/netipsec/xform_esp.c b/freebsd/sys/netipsec/xform_esp.c index 20790d0d..bf98dc03 100644 --- a/freebsd/sys/netipsec/xform_esp.c +++ b/freebsd/sys/netipsec/xform_esp.c @@ -46,8 +46,12 @@ #include #include #include +#include #include +#include #include +#include +#include #include #include @@ -58,7 +62,6 @@ #include #include -#include #include #include #include @@ -79,50 +82,23 @@ #include VNET_DEFINE(int, esp_enable) = 1; -VNET_DEFINE(struct espstat, espstat); +VNET_PCPUSTAT_DEFINE(struct espstat, espstat); +VNET_PCPUSTAT_SYSINIT(espstat); -SYSCTL_DECL(_net_inet_esp); -SYSCTL_VNET_INT(_net_inet_esp, OID_AUTO, - esp_enable, CTLFLAG_RW, &VNET_NAME(esp_enable), 0, ""); -SYSCTL_VNET_STRUCT(_net_inet_esp, IPSECCTL_STATS, - stats, CTLFLAG_RD, &VNET_NAME(espstat), espstat, ""); +#ifdef VIMAGE +VNET_PCPUSTAT_SYSUNINIT(espstat); +#endif /* VIMAGE */ -static VNET_DEFINE(int, esp_max_ivlen); /* max iv length over all algorithms */ -#define V_esp_max_ivlen VNET(esp_max_ivlen) +SYSCTL_DECL(_net_inet_esp); +SYSCTL_INT(_net_inet_esp, OID_AUTO, esp_enable, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(esp_enable), 0, ""); +SYSCTL_VNET_PCPUSTAT(_net_inet_esp, IPSECCTL_STATS, stats, + struct espstat, espstat, + "ESP statistics (struct espstat, netipsec/esp_var.h"); static int esp_input_cb(struct cryptop *op); static int esp_output_cb(struct cryptop *crp); -/* - * NB: this is public for use by the PF_KEY support. - * NB: if you add support here; be sure to add code to esp_attach below! - */ -struct enc_xform * -esp_algorithm_lookup(int alg) -{ - if (alg >= ESP_ALG_MAX) - return NULL; - switch (alg) { - case SADB_EALG_DESCBC: - return &enc_xform_des; - case SADB_EALG_3DESCBC: - return &enc_xform_3des; - case SADB_X_EALG_AES: - return &enc_xform_rijndael128; - case SADB_X_EALG_BLOWFISHCBC: - return &enc_xform_blf; - case SADB_X_EALG_CAST128CBC: - return &enc_xform_cast5; - case SADB_X_EALG_SKIPJACK: - return &enc_xform_skipjack; - case SADB_EALG_NULL: - return &enc_xform_null; - case SADB_X_EALG_CAMELLIACBC: - return &enc_xform_camellia; - } - return NULL; -} - size_t esp_hdrsiz(struct secasvar *sav) { @@ -149,7 +125,7 @@ esp_hdrsiz(struct secasvar *sav) * + sizeof (next header field) * + max icv supported. */ - size = sizeof (struct newesp) + V_esp_max_ivlen + 9 + 16; + size = sizeof (struct newesp) + EALG_MAX_BLOCK_LEN + 9 + 16; } return size; } @@ -160,12 +136,12 @@ esp_hdrsiz(struct secasvar *sav) static int esp_init(struct secasvar *sav, struct xformsw *xsp) { - struct enc_xform *txform; + const struct enc_xform *txform; struct cryptoini cria, crie; int keylen; int error; - txform = esp_algorithm_lookup(sav->alg_enc); + txform = enc_algorithm_lookup(sav->alg_enc); if (txform == NULL) { DPRINTF(("%s: unsupported encryption algorithm %d\n", __func__, sav->alg_enc)); @@ -176,12 +152,14 @@ esp_init(struct secasvar *sav, struct xformsw *xsp) __func__, txform->name)); return EINVAL; } - if ((sav->flags&(SADB_X_EXT_OLD|SADB_X_EXT_IV4B)) == SADB_X_EXT_IV4B) { + if ((sav->flags & (SADB_X_EXT_OLD | SADB_X_EXT_IV4B)) == + SADB_X_EXT_IV4B) { DPRINTF(("%s: 4-byte IV not supported with protocol\n", __func__)); return EINVAL; } - keylen = _KEYLEN(sav->key_enc); + /* subtract off the salt, RFC4106, 8.1 and RFC3686, 5.1 */ + keylen = _KEYLEN(sav->key_enc) - SAV_ISCTRORGCM(sav) * 4; if (txform->minkey > keylen || keylen > txform->maxkey) { DPRINTF(("%s: invalid key length %u, must be in the range " "[%u..%u] for algorithm %s\n", __func__, @@ -190,19 +168,10 @@ esp_init(struct secasvar *sav, struct xformsw *xsp) return EINVAL; } - /* - * NB: The null xform needs a non-zero blocksize to keep the - * crypto code happy but if we use it to set ivlen then - * the ESP header will be processed incorrectly. The - * compromise is to force it to zero here. - */ - sav->ivlen = (txform == &enc_xform_null ? 0 : txform->blocksize); - sav->iv = (caddr_t) malloc(sav->ivlen, M_XDATA, M_WAITOK); - if (sav->iv == NULL) { - DPRINTF(("%s: no memory for IV\n", __func__)); - return EINVAL; - } - key_randomfill(sav->iv, sav->ivlen); /*XXX*/ + if (SAV_ISCTRORGCM(sav)) + sav->ivlen = 8; /* RFC4106 3.1 and RFC3686 3.1 */ + else + sav->ivlen = txform->ivsize; /* * Setup AH-related state. @@ -217,12 +186,42 @@ esp_init(struct secasvar *sav, struct xformsw *xsp) sav->tdb_xform = xsp; sav->tdb_encalgxform = txform; + /* + * Whenever AES-GCM is used for encryption, one + * of the AES authentication algorithms is chosen + * as well, based on the key size. + */ + if (sav->alg_enc == SADB_X_EALG_AESGCM16) { + switch (keylen) { + case AES_128_GMAC_KEY_LEN: + sav->alg_auth = SADB_X_AALG_AES128GMAC; + sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_128; + break; + case AES_192_GMAC_KEY_LEN: + sav->alg_auth = SADB_X_AALG_AES192GMAC; + sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_192; + break; + case AES_256_GMAC_KEY_LEN: + sav->alg_auth = SADB_X_AALG_AES256GMAC; + sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_256; + break; + default: + DPRINTF(("%s: invalid key length %u" + "for algorithm %s\n", __func__, + keylen, txform->name)); + return EINVAL; + } + bzero(&cria, sizeof(cria)); + cria.cri_alg = sav->tdb_authalgxform->type; + cria.cri_key = sav->key_enc->key_data; + cria.cri_klen = _KEYBITS(sav->key_enc) - SAV_ISGCM(sav) * 32; + } + /* Initialize crypto session. */ - bzero(&crie, sizeof (crie)); + bzero(&crie, sizeof(crie)); crie.cri_alg = sav->tdb_encalgxform->type; - crie.cri_klen = _KEYBITS(sav->key_enc); crie.cri_key = sav->key_enc->key_data; - /* XXX Rounds ? */ + crie.cri_klen = _KEYBITS(sav->key_enc) - SAV_ISCTRORGCM(sav) * 32; if (sav->tdb_authalgxform && sav->tdb_encalgxform) { /* init both auth & enc */ @@ -255,10 +254,6 @@ esp_zeroize(struct secasvar *sav) if (sav->key_enc) bzero(sav->key_enc->key_data, _KEYLEN(sav->key_enc)); - if (sav->iv) { - free(sav->iv, M_XDATA); - sav->iv = NULL; - } sav->tdb_encalgxform = NULL; sav->tdb_xform = NULL; return error; @@ -270,16 +265,16 @@ esp_zeroize(struct secasvar *sav) static int esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { - struct auth_hash *esph; - struct enc_xform *espx; - struct tdb_ident *tdbi; - struct tdb_crypto *tc; - int plen, alen, hlen; - struct m_tag *mtag; - struct newesp *esp; - + char buf[128]; + const struct auth_hash *esph; + const struct enc_xform *espx; + struct xform_data *xd; struct cryptodesc *crde; struct cryptop *crp; + struct newesp *esp; + uint8_t *ivp; + uint64_t cryptoid; + int plen, alen, hlen; IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->tdb_encalgxform != NULL, ("null encoding xform")); @@ -292,32 +287,19 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) m_freem(m); return EINVAL; } - /* XXX don't pullup, just copy header */ IP6_EXTHDR_GET(esp, struct newesp *, m, skip, sizeof (struct newesp)); esph = sav->tdb_authalgxform; espx = sav->tdb_encalgxform; - /* Determine the ESP header length */ + /* Determine the ESP header and auth length */ if (sav->flags & SADB_X_EXT_OLD) hlen = sizeof (struct esp) + sav->ivlen; else hlen = sizeof (struct newesp) + sav->ivlen; - /* Authenticator hash size */ - if (esph != NULL) { - switch (esph->type) { - case CRYPTO_SHA2_256_HMAC: - case CRYPTO_SHA2_384_HMAC: - case CRYPTO_SHA2_512_HMAC: - alen = esph->hashsize/2; - break; - default: - alen = AH_HMAC_HASHLEN; - break; - } - }else - alen = 0; + + alen = xform_ah_authsize(esph); /* * Verify payload length is multiple of encryption algorithm @@ -330,10 +312,9 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) plen = m->m_pkthdr.len - (skip + hlen + alen); if ((plen & (espx->blocksize - 1)) || (plen <= 0)) { DPRINTF(("%s: payload of %d octets not a multiple of %d octets," - " SA %s/%08lx\n", __func__, - plen, espx->blocksize, - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + " SA %s/%08lx\n", __func__, plen, espx->blocksize, + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), + (u_long)ntohl(sav->spi))); ESPSTAT_INC(esps_badilen); m_freem(m); return EINVAL; @@ -342,29 +323,23 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) /* * Check sequence number. */ - if (esph && sav->replay && !ipsec_chkreplay(ntohl(esp->esp_seq), sav)) { - DPRINTF(("%s: packet replay check for %s\n", __func__, - ipsec_logsastr(sav))); /*XXX*/ - ESPSTAT_INC(esps_replay); - m_freem(m); - return ENOBUFS; /*XXX*/ + SECASVAR_LOCK(sav); + if (esph != NULL && sav->replay != NULL && sav->replay->wsize != 0) { + if (ipsec_chkreplay(ntohl(esp->esp_seq), sav) == 0) { + SECASVAR_UNLOCK(sav); + DPRINTF(("%s: packet replay check for %s\n", __func__, + ipsec_sa2str(sav, buf, sizeof(buf)))); + ESPSTAT_INC(esps_replay); + m_freem(m); + return (EACCES); + } } + cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); /* Update the counters */ ESPSTAT_ADD(esps_ibytes, m->m_pkthdr.len - (skip + hlen + alen)); - /* Find out if we've already done crypto */ - for (mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_CRYPTO_DONE, NULL); - mtag != NULL; - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_CRYPTO_DONE, mtag)) { - tdbi = (struct tdb_ident *) (mtag + 1); - if (tdbi->proto == sav->sah->saidx.proto && - tdbi->spi == sav->spi && - !bcmp(&tdbi->dst, &sav->sah->saidx.dst, - sizeof(union sockaddr_union))) - break; - } - /* Get crypto descriptors */ crp = crypto_getreq(esph && espx ? 2 : 1); if (crp == NULL) { @@ -376,40 +351,33 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) } /* Get IPsec-specific opaque pointer */ - if (esph == NULL || mtag != NULL) - tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto), - M_XDATA, M_NOWAIT|M_ZERO); - else - tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto) + alen, - M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { - crypto_freereq(crp); - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + xd = malloc(sizeof(*xd) + alen, M_XDATA, M_NOWAIT | M_ZERO); + if (xd == NULL) { + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); ESPSTAT_INC(esps_crypto); + crypto_freereq(crp); m_freem(m); return ENOBUFS; } - tc->tc_ptr = (caddr_t) mtag; - - if (esph) { + if (esph != NULL) { struct cryptodesc *crda = crp->crp_desc; IPSEC_ASSERT(crda != NULL, ("null ah crypto descriptor")); /* Authentication descriptor */ crda->crd_skip = skip; - crda->crd_len = m->m_pkthdr.len - (skip + alen); + if (SAV_ISGCM(sav)) + crda->crd_len = 8; /* RFC4106 5, SPI + SN */ + else + crda->crd_len = m->m_pkthdr.len - (skip + alen); crda->crd_inject = m->m_pkthdr.len - alen; crda->crd_alg = esph->type; - crda->crd_key = sav->key_auth->key_data; - crda->crd_klen = _KEYBITS(sav->key_auth); /* Copy the authenticator */ - if (mtag == NULL) - m_copydata(m, m->m_pkthdr.len - alen, alen, - (caddr_t) (tc + 1)); + m_copydata(m, m->m_pkthdr.len - alen, alen, + (caddr_t) (xd + 1)); /* Chain authentication request */ crde = crda->crd_next; @@ -422,35 +390,43 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = esp_input_cb; - crp->crp_sid = sav->tdb_cryptoid; - crp->crp_opaque = (caddr_t) tc; + crp->crp_sid = cryptoid; + crp->crp_opaque = (caddr_t) xd; /* These are passed as-is to the callback */ - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_protoff = protoff; - tc->tc_skip = skip; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; + xd->sav = sav; + xd->protoff = protoff; + xd->skip = skip; + xd->cryptoid = cryptoid; /* Decryption descriptor */ - if (espx) { - IPSEC_ASSERT(crde != NULL, ("null esp crypto descriptor")); - crde->crd_skip = skip + hlen; - crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen); - crde->crd_inject = skip + hlen - sav->ivlen; - - crde->crd_alg = espx->type; - crde->crd_key = sav->key_enc->key_data; - crde->crd_klen = _KEYBITS(sav->key_enc); - /* XXX Rounds ? */ + IPSEC_ASSERT(crde != NULL, ("null esp crypto descriptor")); + crde->crd_skip = skip + hlen; + crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen); + crde->crd_inject = skip + hlen - sav->ivlen; + + if (SAV_ISCTRORGCM(sav)) { + ivp = &crde->crd_iv[0]; + + /* GCM IV Format: RFC4106 4 */ + /* CTR IV Format: RFC3686 4 */ + /* Salt is last four bytes of key, RFC4106 8.1 */ + /* Nonce is last four bytes of key, RFC3686 5.1 */ + memcpy(ivp, sav->key_enc->key_data + + _KEYLEN(sav->key_enc) - 4, 4); + + if (SAV_ISCTR(sav)) { + /* Initial block counter is 1, RFC3686 4 */ + be32enc(&ivp[sav->ivlen + 4], 1); + } + + m_copydata(m, skip + hlen - sav->ivlen, sav->ivlen, &ivp[4]); + crde->crd_flags |= CRD_F_IV_EXPLICIT; } - if (mtag == NULL) - return crypto_dispatch(crp); - else - return esp_input_cb(crp); + crde->crd_alg = espx->type; + + return (crypto_dispatch(crp)); } /* @@ -459,50 +435,41 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) static int esp_input_cb(struct cryptop *crp) { + char buf[128]; u_int8_t lastthree[3], aalg[AH_HMAC_MAXHASHLEN]; - int hlen, skip, protoff, error, alen; + const struct auth_hash *esph; + const struct enc_xform *espx; struct mbuf *m; struct cryptodesc *crd; - struct auth_hash *esph; - struct enc_xform *espx; - struct tdb_crypto *tc; - struct m_tag *mtag; + struct xform_data *xd; struct secasvar *sav; struct secasindex *saidx; caddr_t ptr; + uint64_t cryptoid; + int hlen, skip, protoff, error, alen; crd = crp->crp_desc; IPSEC_ASSERT(crd != NULL, ("null crypto descriptor!")); - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!")); - skip = tc->tc_skip; - protoff = tc->tc_protoff; - mtag = (struct m_tag *) tc->tc_ptr; m = (struct mbuf *) crp->crp_buf; - - sav = tc->tc_sav; - IPSEC_ASSERT(sav != NULL, ("null SA!")); - + xd = (struct xform_data *) crp->crp_opaque; + sav = xd->sav; + skip = xd->skip; + protoff = xd->protoff; + cryptoid = xd->cryptoid; saidx = &sav->sah->saidx; - IPSEC_ASSERT(saidx->dst.sa.sa_family == AF_INET || - saidx->dst.sa.sa_family == AF_INET6, - ("unexpected protocol family %u", saidx->dst.sa.sa_family)); - esph = sav->tdb_authalgxform; espx = sav->tdb_encalgxform; /* Check for crypto errors */ if (crp->crp_etype) { - /* Reset the session ID */ - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - error = crypto_dispatch(crp); - return error; + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; + return (crypto_dispatch(crp)); } - ESPSTAT_INC(esps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; @@ -520,48 +487,29 @@ esp_input_cb(struct cryptop *crp) /* If authentication was performed, check now. */ if (esph != NULL) { - switch (esph->type) { - case CRYPTO_SHA2_256_HMAC: - case CRYPTO_SHA2_384_HMAC: - case CRYPTO_SHA2_512_HMAC: - alen = esph->hashsize/2; - break; - default: - alen = AH_HMAC_HASHLEN; - break; - } - /* - * If we have a tag, it means an IPsec-aware NIC did - * the verification for us. Otherwise we need to - * check the authentication calculation. - */ + alen = xform_ah_authsize(esph); AHSTAT_INC(ahs_hist[sav->alg_auth]); - if (mtag == NULL) { - /* Copy the authenticator from the packet */ - m_copydata(m, m->m_pkthdr.len - alen, - alen, aalg); - - ptr = (caddr_t) (tc + 1); - - /* Verify authenticator */ - if (bcmp(ptr, aalg, alen) != 0) { - DPRINTF(("%s: " - "authentication hash mismatch for packet in SA %s/%08lx\n", - __func__, - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - ESPSTAT_INC(esps_badauth); - error = EACCES; - goto bad; - } + /* Copy the authenticator from the packet */ + m_copydata(m, m->m_pkthdr.len - alen, alen, aalg); + ptr = (caddr_t) (xd + 1); + + /* Verify authenticator */ + if (timingsafe_bcmp(ptr, aalg, alen) != 0) { + DPRINTF(("%s: authentication hash mismatch for " + "packet in SA %s/%08lx\n", __func__, + ipsec_address(&saidx->dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); + ESPSTAT_INC(esps_badauth); + error = EACCES; + goto bad; } - + m->m_flags |= M_AUTHIPDGM; /* Remove trailing authenticator */ m_adj(m, -alen); } /* Release the crypto descriptors */ - free(tc, M_XDATA), tc = NULL; + free(xd, M_XDATA), xd = NULL; crypto_freereq(crp), crp = NULL; /* @@ -577,13 +525,16 @@ esp_input_cb(struct cryptop *crp) m_copydata(m, skip + offsetof(struct newesp, esp_seq), sizeof (seq), (caddr_t) &seq); + SECASVAR_LOCK(sav); if (ipsec_updatereplay(ntohl(seq), sav)) { + SECASVAR_UNLOCK(sav); DPRINTF(("%s: packet replay check for %s\n", __func__, - ipsec_logsastr(sav))); + ipsec_sa2str(sav, buf, sizeof(buf)))); ESPSTAT_INC(esps_replay); - error = ENOBUFS; + error = EACCES; goto bad; } + SECASVAR_UNLOCK(sav); } /* Determine the ESP header length */ @@ -597,7 +548,7 @@ esp_input_cb(struct cryptop *crp) if (error) { ESPSTAT_INC(esps_hdrops); DPRINTF(("%s: bad mbuf chain, SA %s/%08lx\n", __func__, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); goto bad; } @@ -609,10 +560,10 @@ esp_input_cb(struct cryptop *crp) if (lastthree[1] + 2 > m->m_pkthdr.len - skip) { ESPSTAT_INC(esps_badilen); DPRINTF(("%s: invalid padding length %d for %u byte packet " - "in SA %s/%08lx\n", __func__, - lastthree[1], m->m_pkthdr.len - skip, - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + "in SA %s/%08lx\n", __func__, lastthree[1], + m->m_pkthdr.len - skip, + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); error = EINVAL; goto bad; } @@ -622,9 +573,9 @@ esp_input_cb(struct cryptop *crp) if (lastthree[1] != lastthree[0] && lastthree[1] != 0) { ESPSTAT_INC(esps_badenc); DPRINTF(("%s: decryption failed for packet in " - "SA %s/%08lx\n", __func__, - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + "SA %s/%08lx\n", __func__, ipsec_address( + &sav->sah->saidx.dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); error = EINVAL; goto bad; } @@ -639,60 +590,52 @@ esp_input_cb(struct cryptop *crp) switch (saidx->dst.sa.sa_family) { #ifdef INET6 case AF_INET6: - error = ipsec6_common_input_cb(m, sav, skip, protoff, mtag); + error = ipsec6_common_input_cb(m, sav, skip, protoff); break; #endif #ifdef INET case AF_INET: - error = ipsec4_common_input_cb(m, sav, skip, protoff, mtag); + error = ipsec4_common_input_cb(m, sav, skip, protoff); break; #endif default: panic("%s: Unexpected address family: %d saidx=%p", __func__, saidx->dst.sa.sa_family, saidx); } - - KEY_FREESAV(&sav); return error; bad: - if (sav) - KEY_FREESAV(&sav); + if (sav != NULL) + key_freesav(&sav); if (m != NULL) m_freem(m); - if (tc != NULL) - free(tc, M_XDATA); + if (xd != NULL) + free(xd, M_XDATA); if (crp != NULL) crypto_freereq(crp); return error; } - /* - * ESP output routine, called by ipsec[46]_process_packet(). + * ESP output routine, called by ipsec[46]_perform_request(). */ static int -esp_output( - struct mbuf *m, - struct ipsecrequest *isr, - struct mbuf **mp, - int skip, - int protoff -) +esp_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, + u_int idx, int skip, int protoff) { - struct enc_xform *espx; - struct auth_hash *esph; - int hlen, rlen, plen, padding, blks, alen, i, roff; - struct mbuf *mo = (struct mbuf *) NULL; - struct tdb_crypto *tc; - struct secasvar *sav; + char buf[IPSEC_ADDRSTRLEN]; + struct cryptodesc *crde = NULL, *crda = NULL; + struct cryptop *crp; + const struct auth_hash *esph; + const struct enc_xform *espx; + struct mbuf *mo = NULL; + struct xform_data *xd; struct secasindex *saidx; unsigned char *pad; - u_int8_t prot; + uint8_t *ivp; + uint64_t cntr, cryptoid; + int hlen, rlen, padding, blks, alen, i, roff; int error, maxpacketsize; + uint8_t prot; - struct cryptodesc *crde = NULL, *crda = NULL; - struct cryptop *crp; - - sav = isr->sav; IPSEC_ASSERT(sav != NULL, ("null SA")); esph = sav->tdb_authalgxform; espx = sav->tdb_encalgxform; @@ -705,28 +648,14 @@ esp_output( rlen = m->m_pkthdr.len - skip; /* Raw payload length. */ /* - * NB: The null encoding transform has a blocksize of 4 - * so that headers are properly aligned. + * RFC4303 2.4 Requires 4 byte alignment. */ - blks = espx->blocksize; /* IV blocksize */ + blks = MAX(4, espx->blocksize); /* Cipher blocksize */ /* XXX clamp padding length a la KAME??? */ padding = ((blks - ((rlen + 2) % blks)) % blks) + 2; - plen = rlen + padding; /* Padded payload length. */ - - if (esph) - switch (esph->type) { - case CRYPTO_SHA2_256_HMAC: - case CRYPTO_SHA2_384_HMAC: - case CRYPTO_SHA2_512_HMAC: - alen = esph->hashsize/2; - break; - default: - alen = AH_HMAC_HASHLEN; - break; - } - else - alen = 0; + + alen = xform_ah_authsize(esph); ESPSTAT_INC(esps_output); @@ -746,16 +675,20 @@ esp_output( default: DPRINTF(("%s: unknown/unsupported protocol " "family %d, SA %s/%08lx\n", __func__, - saidx->dst.sa.sa_family, ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); + saidx->dst.sa.sa_family, ipsec_address(&saidx->dst, + buf, sizeof(buf)), (u_long) ntohl(sav->spi))); ESPSTAT_INC(esps_nopf); error = EPFNOSUPPORT; goto bad; } + /* + DPRINTF(("%s: skip %d hlen %d rlen %d padding %d alen %d blksd %d\n", + __func__, skip, hlen, rlen, padding, alen, blks)); */ if (skip + hlen + rlen + padding + alen > maxpacketsize) { DPRINTF(("%s: packet in SA %s/%08lx got too big " "(len %u, max len %u)\n", __func__, - ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi), + ipsec_address(&saidx->dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi), skip + hlen + rlen + padding + alen, maxpacketsize)); ESPSTAT_INC(esps_toobig); error = EMSGSIZE; @@ -768,7 +701,8 @@ esp_output( m = m_unshare(m, M_NOWAIT); if (m == NULL) { DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__, - ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi))); + ipsec_address(&saidx->dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); ESPSTAT_INC(esps_hdrops); error = ENOBUFS; goto bad; @@ -778,17 +712,19 @@ esp_output( mo = m_makespace(m, skip, hlen, &roff); if (mo == NULL) { DPRINTF(("%s: %u byte ESP hdr inject failed for SA %s/%08lx\n", - __func__, hlen, ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - ESPSTAT_INC(esps_hdrops); /* XXX diffs from openbsd */ + __func__, hlen, ipsec_address(&saidx->dst, buf, + sizeof(buf)), (u_long) ntohl(sav->spi))); + ESPSTAT_INC(esps_hdrops); /* XXX diffs from openbsd */ error = ENOBUFS; goto bad; } /* Initialize ESP header. */ - bcopy((caddr_t) &sav->spi, mtod(mo, caddr_t) + roff, sizeof(u_int32_t)); + bcopy((caddr_t) &sav->spi, mtod(mo, caddr_t) + roff, + sizeof(uint32_t)); + SECASVAR_LOCK(sav); if (sav->replay) { - u_int32_t replay; + uint32_t replay; #ifdef REGRESSION /* Emulate replay attack when ipsec_replay is TRUE. */ @@ -796,10 +732,14 @@ esp_output( #endif sav->replay->count++; replay = htonl(sav->replay->count); - bcopy((caddr_t) &replay, - mtod(mo, caddr_t) + roff + sizeof(u_int32_t), - sizeof(u_int32_t)); + + bcopy((caddr_t) &replay, mtod(mo, caddr_t) + roff + + sizeof(uint32_t), sizeof(uint32_t)); } + cryptoid = sav->tdb_cryptoid; + if (SAV_ISCTRORGCM(sav)) + cntr = sav->cntr++; + SECASVAR_UNLOCK(sav); /* * Add padding -- better to do it ourselves than use the crypto engine, @@ -808,7 +748,8 @@ esp_output( pad = (u_char *) m_pad(m, padding + alen); if (pad == NULL) { DPRINTF(("%s: m_pad failed for SA %s/%08lx\n", __func__, - ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi))); + ipsec_address(&saidx->dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); m = NULL; /* NB: free'd by m_pad */ error = ENOBUFS; goto bad; @@ -840,7 +781,7 @@ esp_output( m_copyback(m, protoff, sizeof(u_int8_t), (u_char *) &prot); /* Get crypto descriptors. */ - crp = crypto_getreq(esph && espx ? 2 : 1); + crp = crypto_getreq(esph != NULL ? 2 : 1); if (crp == NULL) { DPRINTF(("%s: failed to acquire crypto descriptors\n", __func__)); @@ -849,61 +790,70 @@ esp_output( goto bad; } - if (espx) { - crde = crp->crp_desc; - crda = crde->crd_next; - - /* Encryption descriptor. */ - crde->crd_skip = skip + hlen; - crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen); - crde->crd_flags = CRD_F_ENCRYPT; - crde->crd_inject = skip + hlen - sav->ivlen; - - /* Encryption operation. */ - crde->crd_alg = espx->type; - crde->crd_key = sav->key_enc->key_data; - crde->crd_klen = _KEYBITS(sav->key_enc); - /* XXX Rounds ? */ - } else - crda = crp->crp_desc; - /* IPsec-specific opaque crypto info. */ - tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto), - M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { + xd = malloc(sizeof(struct xform_data), M_XDATA, M_NOWAIT | M_ZERO); + if (xd == NULL) { crypto_freereq(crp); - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); ESPSTAT_INC(esps_crypto); error = ENOBUFS; goto bad; } + crde = crp->crp_desc; + crda = crde->crd_next; + + /* Encryption descriptor. */ + crde->crd_skip = skip + hlen; + crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen); + crde->crd_flags = CRD_F_ENCRYPT; + crde->crd_inject = skip + hlen - sav->ivlen; + + /* Encryption operation. */ + crde->crd_alg = espx->type; + if (SAV_ISCTRORGCM(sav)) { + ivp = &crde->crd_iv[0]; + + /* GCM IV Format: RFC4106 4 */ + /* CTR IV Format: RFC3686 4 */ + /* Salt is last four bytes of key, RFC4106 8.1 */ + /* Nonce is last four bytes of key, RFC3686 5.1 */ + memcpy(ivp, sav->key_enc->key_data + + _KEYLEN(sav->key_enc) - 4, 4); + be64enc(&ivp[4], cntr); + if (SAV_ISCTR(sav)) { + /* Initial block counter is 1, RFC3686 4 */ + /* XXXAE: should we use this only for first packet? */ + be32enc(&ivp[sav->ivlen + 4], 1); + } + + m_copyback(m, skip + hlen - sav->ivlen, sav->ivlen, &ivp[4]); + crde->crd_flags |= CRD_F_IV_EXPLICIT|CRD_F_IV_PRESENT; + } + /* Callback parameters */ - tc->tc_isr = isr; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; - tc->tc_spi = sav->spi; - tc->tc_dst = saidx->dst; - tc->tc_proto = saidx->proto; + xd->sp = sp; + xd->sav = sav; + xd->idx = idx; + xd->cryptoid = cryptoid; /* Crypto operation descriptor. */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = esp_output_cb; - crp->crp_opaque = (caddr_t) tc; - crp->crp_sid = sav->tdb_cryptoid; + crp->crp_opaque = (caddr_t) xd; + crp->crp_sid = cryptoid; if (esph) { /* Authentication descriptor. */ + crda->crd_alg = esph->type; crda->crd_skip = skip; - crda->crd_len = m->m_pkthdr.len - (skip + alen); + if (SAV_ISGCM(sav)) + crda->crd_len = 8; /* RFC4106 5, SPI + SN */ + else + crda->crd_len = m->m_pkthdr.len - (skip + alen); crda->crd_inject = m->m_pkthdr.len - alen; - - /* Authentication operation. */ - crda->crd_alg = esph->type; - crda->crd_key = sav->key_auth->key_data; - crda->crd_klen = _KEYBITS(sav->key_auth); } return crypto_dispatch(crp); @@ -912,51 +862,40 @@ bad: m_freem(m); return (error); } - /* * ESP output callback from the crypto driver. */ static int esp_output_cb(struct cryptop *crp) { - struct tdb_crypto *tc; - struct ipsecrequest *isr; + struct xform_data *xd; + struct secpolicy *sp; struct secasvar *sav; struct mbuf *m; - int err, error; + uint64_t cryptoid; + u_int idx; + int error; - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque data area!")); + xd = (struct xform_data *) crp->crp_opaque; m = (struct mbuf *) crp->crp_buf; - - isr = tc->tc_isr; - IPSECREQUEST_LOCK(isr); - sav = tc->tc_sav; - /* With the isr lock released SA pointer can be updated. */ - if (sav != isr->sav) { - ESPSTAT_INC(esps_notdb); - DPRINTF(("%s: SA gone during crypto (SA %s/%08lx proto %u)\n", - __func__, ipsec_address(&tc->tc_dst), - (u_long) ntohl(tc->tc_spi), tc->tc_proto)); - error = ENOBUFS; /*XXX*/ - goto bad; - } + sp = xd->sp; + sav = xd->sav; + idx = xd->idx; + cryptoid = xd->cryptoid; /* Check for crypto errors. */ if (crp->crp_etype) { - /* Reset session ID. */ - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - IPSECREQUEST_UNLOCK(isr); - error = crypto_dispatch(crp); - return error; + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; + return (crypto_dispatch(crp)); } - ESPSTAT_INC(esps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; + m_freem(m); goto bad; } @@ -967,19 +906,17 @@ esp_output_cb(struct cryptop *crp) error = EINVAL; goto bad; } + free(xd, M_XDATA); + crypto_freereq(crp); ESPSTAT_INC(esps_hist[sav->alg_enc]); if (sav->tdb_authalgxform != NULL) AHSTAT_INC(ahs_hist[sav->alg_auth]); - /* Release crypto descriptors. */ - free(tc, M_XDATA); - crypto_freereq(crp); - #ifdef REGRESSION /* Emulate man-in-the-middle attack when ipsec_integrity is TRUE. */ if (V_ipsec_integrity) { static unsigned char ipseczeroes[AH_HMAC_MAXHASHLEN]; - struct auth_hash *esph; + const struct auth_hash *esph; /* * Corrupt HMAC if we want to test integrity verification of @@ -989,16 +926,7 @@ esp_output_cb(struct cryptop *crp) if (esph != NULL) { int alen; - switch (esph->type) { - case CRYPTO_SHA2_256_HMAC: - case CRYPTO_SHA2_384_HMAC: - case CRYPTO_SHA2_512_HMAC: - alen = esph->hashsize/2; - break; - default: - alen = AH_HMAC_HASHLEN; - break; - } + alen = xform_ah_authsize(esph); m_copyback(m, m->m_pkthdr.len - alen, alen, ipseczeroes); } @@ -1006,44 +934,26 @@ esp_output_cb(struct cryptop *crp) #endif /* NB: m is reclaimed by ipsec_process_done. */ - err = ipsec_process_done(m, isr); - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - return err; + error = ipsec_process_done(m, sp, sav, idx); + return (error); bad: - if (sav) - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - if (m) - m_freem(m); - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); - return error; + key_freesav(&sav); + key_freesp(&sp); + return (error); } static struct xformsw esp_xformsw = { - XF_ESP, XFT_CONF|XFT_AUTH, "IPsec ESP", - esp_init, esp_zeroize, esp_input, - esp_output + .xf_type = XF_ESP, + .xf_name = "IPsec ESP", + .xf_init = esp_init, + .xf_zeroize = esp_zeroize, + .xf_input = esp_input, + .xf_output = esp_output, }; -static void -esp_attach(void) -{ -#define MAXIV(xform) \ - if (xform.blocksize > V_esp_max_ivlen) \ - V_esp_max_ivlen = xform.blocksize \ - - MAXIV(enc_xform_des); /* SADB_EALG_DESCBC */ - MAXIV(enc_xform_3des); /* SADB_EALG_3DESCBC */ - MAXIV(enc_xform_rijndael128); /* SADB_X_EALG_AES */ - MAXIV(enc_xform_blf); /* SADB_X_EALG_BLOWFISHCBC */ - MAXIV(enc_xform_cast5); /* SADB_X_EALG_CAST128CBC */ - MAXIV(enc_xform_skipjack); /* SADB_X_EALG_SKIPJACK */ - MAXIV(enc_xform_null); /* SADB_EALG_NULL */ - MAXIV(enc_xform_camellia); /* SADB_X_EALG_CAMELLIACBC */ - - xform_register(&esp_xformsw); -#undef MAXIV -} -SYSINIT(esp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, esp_attach, NULL); +SYSINIT(esp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + xform_attach, &esp_xformsw); +SYSUNINIT(esp_xform_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + xform_detach, &esp_xformsw); diff --git a/freebsd/sys/netipsec/xform_ipcomp.c b/freebsd/sys/netipsec/xform_ipcomp.c index 2478c948..f5a7aad4 100644 --- a/freebsd/sys/netipsec/xform_ipcomp.c +++ b/freebsd/sys/netipsec/xform_ipcomp.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -49,8 +48,9 @@ #include #include #include +#include -#include +#include #include #include @@ -58,6 +58,7 @@ #ifdef INET6 #include +#include #include #endif @@ -72,27 +73,76 @@ #include VNET_DEFINE(int, ipcomp_enable) = 1; -VNET_DEFINE(struct ipcompstat, ipcompstat); +VNET_PCPUSTAT_DEFINE(struct ipcompstat, ipcompstat); +VNET_PCPUSTAT_SYSINIT(ipcompstat); + +#ifdef VIMAGE +VNET_PCPUSTAT_SYSUNINIT(ipcompstat); +#endif /* VIMAGE */ SYSCTL_DECL(_net_inet_ipcomp); -SYSCTL_VNET_INT(_net_inet_ipcomp, OID_AUTO, - ipcomp_enable, CTLFLAG_RW, &VNET_NAME(ipcomp_enable), 0, ""); -SYSCTL_VNET_STRUCT(_net_inet_ipcomp, IPSECCTL_STATS, - stats, CTLFLAG_RD, &VNET_NAME(ipcompstat), ipcompstat, ""); +SYSCTL_INT(_net_inet_ipcomp, OID_AUTO, ipcomp_enable, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipcomp_enable), 0, ""); +SYSCTL_VNET_PCPUSTAT(_net_inet_ipcomp, IPSECCTL_STATS, stats, + struct ipcompstat, ipcompstat, + "IPCOMP statistics (struct ipcompstat, netipsec/ipcomp_var.h"); static int ipcomp_input_cb(struct cryptop *crp); static int ipcomp_output_cb(struct cryptop *crp); -struct comp_algo * -ipcomp_algorithm_lookup(int alg) +/* + * RFC 3173 p 2.2. Non-Expansion Policy: + * If the total size of a compressed payload and the IPComp header, as + * defined in section 3, is not smaller than the size of the original + * payload, the IP datagram MUST be sent in the original non-compressed + * form. + * + * When we use IPComp in tunnel mode, for small packets we will receive + * encapsulated IP-IP datagrams without any compression and without IPComp + * header. + */ +static int +ipcomp_encapcheck(union sockaddr_union *src, union sockaddr_union *dst) +{ + struct secasvar *sav; + + sav = key_allocsa_tunnel(src, dst, IPPROTO_IPCOMP); + if (sav == NULL) + return (0); + key_freesav(&sav); + + if (src->sa.sa_family == AF_INET) + return (sizeof(struct in_addr) << 4); + else + return (sizeof(struct in6_addr) << 4); +} + +static int +ipcomp_nonexp_input(struct mbuf **mp, int *offp, int proto) { - if (alg >= IPCOMP_ALG_MAX) - return NULL; - switch (alg) { - case SADB_X_CALG_DEFLATE: - return &comp_algo_deflate; + int isr; + + switch (proto) { +#ifdef INET + case IPPROTO_IPV4: + isr = NETISR_IP; + break; +#endif +#ifdef INET6 + case IPPROTO_IPV6: + isr = NETISR_IPV6; + break; +#endif + default: + IPCOMPSTAT_INC(ipcomps_nopf); + m_freem(*mp); + return (IPPROTO_DONE); } - return NULL; + m_adj(*mp, *offp); + IPCOMPSTAT_ADD(ipcomps_ibytes, (*mp)->m_pkthdr.len); + IPCOMPSTAT_INC(ipcomps_input); + netisr_dispatch(isr, *mp); + return (IPPROTO_DONE); } /* @@ -101,11 +151,11 @@ ipcomp_algorithm_lookup(int alg) static int ipcomp_init(struct secasvar *sav, struct xformsw *xsp) { - struct comp_algo *tcomp; + const struct comp_algo *tcomp; struct cryptoini cric; /* NB: algorithm really comes in alg_enc and not alg_comp! */ - tcomp = ipcomp_algorithm_lookup(sav->alg_enc); + tcomp = comp_algorithm_lookup(sav->alg_enc); if (tcomp == NULL) { DPRINTF(("%s: unsupported compression algorithm %d\n", __func__, sav->alg_comp)); @@ -141,7 +191,7 @@ ipcomp_zeroize(struct secasvar *sav) static int ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { - struct tdb_crypto *tc; + struct xform_data *xd; struct cryptodesc *crdc; struct cryptop *crp; struct ipcomp *ipcomp; @@ -176,12 +226,12 @@ ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) return ENOBUFS; } /* Get IPsec-specific opaque pointer */ - tc = (struct tdb_crypto *) malloc(sizeof (*tc), M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { - m_freem(m); - crypto_freereq(crp); - DPRINTF(("%s: cannot allocate tdb_crypto\n", __func__)); + xd = malloc(sizeof(*xd), M_XDATA, M_NOWAIT | M_ZERO); + if (xd == NULL) { + DPRINTF(("%s: cannot allocate xform_data\n", __func__)); IPCOMPSTAT_INC(ipcomps_crypto); + crypto_freereq(crp); + m_freem(m); return ENOBUFS; } crdc = crp->crp_desc; @@ -190,27 +240,25 @@ ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) crdc->crd_len = m->m_pkthdr.len - (skip + hlen); crdc->crd_inject = skip; - tc->tc_ptr = 0; - /* Decompression operation */ crdc->crd_alg = sav->tdb_compalgxform->type; + /* Crypto operation descriptor */ crp->crp_ilen = m->m_pkthdr.len - (skip + hlen); crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = ipcomp_input_cb; - crp->crp_sid = sav->tdb_cryptoid; - crp->crp_opaque = (caddr_t) tc; + crp->crp_opaque = (caddr_t) xd; /* These are passed as-is to the callback */ - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_protoff = protoff; - tc->tc_skip = skip; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; + xd->sav = sav; + xd->protoff = protoff; + xd->skip = skip; + + SECASVAR_LOCK(sav); + crp->crp_sid = xd->cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); return crypto_dispatch(crp); } @@ -221,29 +269,26 @@ ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) static int ipcomp_input_cb(struct cryptop *crp) { + char buf[IPSEC_ADDRSTRLEN]; struct cryptodesc *crd; - struct tdb_crypto *tc; - int skip, protoff; - struct mtag *mtag; + struct xform_data *xd; struct mbuf *m; struct secasvar *sav; struct secasindex *saidx; - int hlen = IPCOMP_HLENGTH, error, clen; - u_int8_t nproto; caddr_t addr; + uint64_t cryptoid; + int hlen = IPCOMP_HLENGTH, error, clen; + int skip, protoff; + uint8_t nproto; crd = crp->crp_desc; - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!")); - skip = tc->tc_skip; - protoff = tc->tc_protoff; - mtag = (struct mtag *) tc->tc_ptr; m = (struct mbuf *) crp->crp_buf; - - sav = tc->tc_sav; - IPSEC_ASSERT(sav != NULL, ("null SA!")); - + xd = (struct xform_data *) crp->crp_opaque; + sav = xd->sav; + skip = xd->skip; + protoff = xd->protoff; + cryptoid = xd->cryptoid; saidx = &sav->sah->saidx; IPSEC_ASSERT(saidx->dst.sa.sa_family == AF_INET || saidx->dst.sa.sa_family == AF_INET6, @@ -251,12 +296,12 @@ ipcomp_input_cb(struct cryptop *crp) /* Check for crypto errors */ if (crp->crp_etype) { - /* Reset the session ID */ - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - return crypto_dispatch(crp); + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; + return (crypto_dispatch(crp)); } IPCOMPSTAT_INC(ipcomps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); @@ -275,13 +320,13 @@ ipcomp_input_cb(struct cryptop *crp) clen = crp->crp_olen; /* Length of data after processing */ /* Release the crypto descriptors */ - free(tc, M_XDATA), tc = NULL; + free(xd, M_XDATA), xd = NULL; crypto_freereq(crp), crp = NULL; /* In case it's not done already, adjust the size of the mbuf chain */ m->m_pkthdr.len = clen + hlen + skip; - if (m->m_len < skip + hlen && (m = m_pullup(m, skip + hlen)) == 0) { + if (m->m_len < skip + hlen && (m = m_pullup(m, skip + hlen)) == NULL) { IPCOMPSTAT_INC(ipcomps_hdrops); /*XXX*/ DPRINTF(("%s: m_pullup failed\n", __func__)); error = EINVAL; /*XXX*/ @@ -297,8 +342,8 @@ ipcomp_input_cb(struct cryptop *crp) if (error) { IPCOMPSTAT_INC(ipcomps_hdrops); DPRINTF(("%s: bad mbuf chain, IPCA %s/%08lx\n", __func__, - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); goto bad; } @@ -308,53 +353,45 @@ ipcomp_input_cb(struct cryptop *crp) switch (saidx->dst.sa.sa_family) { #ifdef INET6 case AF_INET6: - error = ipsec6_common_input_cb(m, sav, skip, protoff, NULL); + error = ipsec6_common_input_cb(m, sav, skip, protoff); break; #endif #ifdef INET case AF_INET: - error = ipsec4_common_input_cb(m, sav, skip, protoff, NULL); + error = ipsec4_common_input_cb(m, sav, skip, protoff); break; #endif default: panic("%s: Unexpected address family: %d saidx=%p", __func__, saidx->dst.sa.sa_family, saidx); } - - KEY_FREESAV(&sav); return error; bad: - if (sav) - KEY_FREESAV(&sav); - if (m) + if (sav != NULL) + key_freesav(&sav); + if (m != NULL) m_freem(m); - if (tc != NULL) - free(tc, M_XDATA); - if (crp) + if (xd != NULL) + free(xd, M_XDATA); + if (crp != NULL) crypto_freereq(crp); return error; } /* - * IPComp output routine, called by ipsec[46]_process_packet() + * IPComp output routine, called by ipsec[46]_perform_request() */ static int -ipcomp_output( - struct mbuf *m, - struct ipsecrequest *isr, - struct mbuf **mp, - int skip, - int protoff -) +ipcomp_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, + u_int idx, int skip, int protoff) { - struct secasvar *sav; - struct comp_algo *ipcompx; - int error, ralen, maxpacketsize; + char buf[IPSEC_ADDRSTRLEN]; + const struct comp_algo *ipcompx; struct cryptodesc *crdc; struct cryptop *crp; - struct tdb_crypto *tc; + struct xform_data *xd; + int error, ralen, maxpacketsize; - sav = isr->sav; IPSEC_ASSERT(sav != NULL, ("null SA")); ipcompx = sav->tdb_compalgxform; IPSEC_ASSERT(ipcompx != NULL, ("null compression xform")); @@ -367,7 +404,7 @@ ipcomp_output( */ if (m->m_pkthdr.len <= ipcompx->minlen) { IPCOMPSTAT_INC(ipcomps_threshold); - return ipsec_process_done(m, isr); + return ipsec_process_done(m, sp, sav, idx); } ralen = m->m_pkthdr.len - skip; /* Raw payload length before comp. */ @@ -390,7 +427,7 @@ ipcomp_output( DPRINTF(("%s: unknown/unsupported protocol family %d, " "IPCA %s/%08lx\n", __func__, sav->sah->saidx.dst.sa.sa_family, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); error = EPFNOSUPPORT; goto bad; @@ -399,7 +436,7 @@ ipcomp_output( IPCOMPSTAT_INC(ipcomps_toobig); DPRINTF(("%s: packet in IPCA %s/%08lx got too big " "(len %u, max len %u)\n", __func__, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi), ralen + skip + IPCOMP_HLENGTH, maxpacketsize)); error = EMSGSIZE; @@ -413,8 +450,8 @@ ipcomp_output( if (m == NULL) { IPCOMPSTAT_INC(ipcomps_hdrops); DPRINTF(("%s: cannot clone mbuf chain, IPCA %s/%08lx\n", - __func__, ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + __func__, ipsec_address(&sav->sah->saidx.dst, buf, + sizeof(buf)), (u_long) ntohl(sav->spi))); error = ENOBUFS; goto bad; } @@ -441,32 +478,31 @@ ipcomp_output( crdc->crd_alg = ipcompx->type; /* IPsec-specific opaque crypto info */ - tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto), - M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { + xd = malloc(sizeof(struct xform_data), M_XDATA, M_NOWAIT | M_ZERO); + if (xd == NULL) { IPCOMPSTAT_INC(ipcomps_crypto); - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); crypto_freereq(crp); error = ENOBUFS; goto bad; } - tc->tc_isr = isr; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_protoff = protoff; - tc->tc_skip = skip; + xd->sp = sp; + xd->sav = sav; + xd->idx = idx; + xd->skip = skip; + xd->protoff = protoff; /* Crypto operation descriptor */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = ipcomp_output_cb; - crp->crp_opaque = (caddr_t) tc; - crp->crp_sid = sav->tdb_cryptoid; + crp->crp_opaque = (caddr_t) xd; + + SECASVAR_LOCK(sav); + crp->crp_sid = xd->cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); return crypto_dispatch(crp); bad: @@ -481,37 +517,32 @@ bad: static int ipcomp_output_cb(struct cryptop *crp) { - struct tdb_crypto *tc; - struct ipsecrequest *isr; + char buf[IPSEC_ADDRSTRLEN]; + struct xform_data *xd; + struct secpolicy *sp; struct secasvar *sav; struct mbuf *m; - int error, skip; + uint64_t cryptoid; + u_int idx; + int error, skip, protoff; - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque data area!")); m = (struct mbuf *) crp->crp_buf; - skip = tc->tc_skip; - - isr = tc->tc_isr; - IPSECREQUEST_LOCK(isr); - sav = tc->tc_sav; - /* With the isr lock released SA pointer can be updated. */ - if (sav != isr->sav) { - IPCOMPSTAT_INC(ipcomps_notdb); - DPRINTF(("%s: SA expired while in crypto\n", __func__)); - error = ENOBUFS; /*XXX*/ - goto bad; - } + xd = (struct xform_data *) crp->crp_opaque; + idx = xd->idx; + sp = xd->sp; + sav = xd->sav; + skip = xd->skip; + protoff = xd->protoff; + cryptoid = xd->cryptoid; /* Check for crypto errors */ if (crp->crp_etype) { - /* Reset the session ID */ - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - IPSECREQUEST_UNLOCK(isr); - return crypto_dispatch(crp); + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; + return (crypto_dispatch(crp)); } IPCOMPSTAT_INC(ipcomps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); @@ -537,9 +568,10 @@ ipcomp_output_cb(struct cryptop *crp) mo = m_makespace(m, skip, IPCOMP_HLENGTH, &roff); if (mo == NULL) { IPCOMPSTAT_INC(ipcomps_wrap); - DPRINTF(("%s: IPCOMP header inject failed for IPCA %s/%08lx\n", - __func__, ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + DPRINTF(("%s: IPCOMP header inject failed " + "for IPCA %s/%08lx\n", + __func__, ipsec_address(&sav->sah->saidx.dst, buf, + sizeof(buf)), (u_long) ntohl(sav->spi))); error = ENOBUFS; goto bad; } @@ -564,7 +596,7 @@ ipcomp_output_cb(struct cryptop *crp) /* Fix Next Protocol in IPv4/IPv6 header */ prot = IPPROTO_IPCOMP; - m_copyback(m, tc->tc_protoff, sizeof(u_int8_t), + m_copyback(m, protoff, sizeof(u_int8_t), (u_char *)&prot); /* Adjust the length in the IP header */ @@ -585,8 +617,8 @@ ipcomp_output_cb(struct cryptop *crp) DPRINTF(("%s: unknown/unsupported protocol " "family %d, IPCA %s/%08lx\n", __func__, sav->sah->saidx.dst.sa.sa_family, - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + ipsec_address(&sav->sah->saidx.dst, buf, + sizeof(buf)), (u_long) ntohl(sav->spi))); error = EPFNOSUPPORT; goto bad; } @@ -600,47 +632,143 @@ ipcomp_output_cb(struct cryptop *crp) } /* Release the crypto descriptor */ - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); /* NB: m is reclaimed by ipsec_process_done. */ - error = ipsec_process_done(m, isr); - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - return error; + error = ipsec_process_done(m, sp, sav, idx); + return (error); bad: - if (sav) - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); if (m) m_freem(m); - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); - return error; + key_freesav(&sav); + key_freesp(&sp); + return (error); +} + +#ifdef INET +static const struct encaptab *ipe4_cookie = NULL; +extern struct domain inetdomain; +static struct protosw ipcomp4_protosw = { + .pr_type = SOCK_RAW, + .pr_domain = &inetdomain, + .pr_protocol = 0 /* IPPROTO_IPV[46] */, + .pr_flags = PR_ATOMIC | PR_ADDR | PR_LASTHDR, + .pr_input = ipcomp_nonexp_input, + .pr_output = rip_output, + .pr_ctloutput = rip_ctloutput, + .pr_usrreqs = &rip_usrreqs +}; + +static int +ipcomp4_nonexp_encapcheck(const struct mbuf *m, int off, int proto, + void *arg __unused) +{ + union sockaddr_union src, dst; + const struct ip *ip; + + if (V_ipcomp_enable == 0) + return (0); + if (proto != IPPROTO_IPV4 && proto != IPPROTO_IPV6) + return (0); + bzero(&src, sizeof(src)); + bzero(&dst, sizeof(dst)); + src.sa.sa_family = dst.sa.sa_family = AF_INET; + src.sin.sin_len = dst.sin.sin_len = sizeof(struct sockaddr_in); + ip = mtod(m, const struct ip *); + src.sin.sin_addr = ip->ip_src; + dst.sin.sin_addr = ip->ip_dst; + return (ipcomp_encapcheck(&src, &dst)); } +#endif +#ifdef INET6 +static const struct encaptab *ipe6_cookie = NULL; +extern struct domain inet6domain; +static struct protosw ipcomp6_protosw = { + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = 0 /* IPPROTO_IPV[46] */, + .pr_flags = PR_ATOMIC | PR_ADDR | PR_LASTHDR, + .pr_input = ipcomp_nonexp_input, + .pr_output = rip6_output, + .pr_ctloutput = rip6_ctloutput, + .pr_usrreqs = &rip6_usrreqs +}; + +static int +ipcomp6_nonexp_encapcheck(const struct mbuf *m, int off, int proto, + void *arg __unused) +{ + union sockaddr_union src, dst; + const struct ip6_hdr *ip6; + + if (V_ipcomp_enable == 0) + return (0); + if (proto != IPPROTO_IPV4 && proto != IPPROTO_IPV6) + return (0); + bzero(&src, sizeof(src)); + bzero(&dst, sizeof(dst)); + src.sa.sa_family = dst.sa.sa_family = AF_INET; + src.sin6.sin6_len = dst.sin6.sin6_len = sizeof(struct sockaddr_in6); + ip6 = mtod(m, const struct ip6_hdr *); + src.sin6.sin6_addr = ip6->ip6_src; + dst.sin6.sin6_addr = ip6->ip6_dst; + if (IN6_IS_SCOPE_LINKLOCAL(&src.sin6.sin6_addr)) { + /* XXX: sa6_recoverscope() */ + src.sin6.sin6_scope_id = + ntohs(src.sin6.sin6_addr.s6_addr16[1]); + src.sin6.sin6_addr.s6_addr16[1] = 0; + } + if (IN6_IS_SCOPE_LINKLOCAL(&dst.sin6.sin6_addr)) { + /* XXX: sa6_recoverscope() */ + dst.sin6.sin6_scope_id = + ntohs(dst.sin6.sin6_addr.s6_addr16[1]); + dst.sin6.sin6_addr.s6_addr16[1] = 0; + } + return (ipcomp_encapcheck(&src, &dst)); +} +#endif static struct xformsw ipcomp_xformsw = { - XF_IPCOMP, XFT_COMP, "IPcomp", - ipcomp_init, ipcomp_zeroize, ipcomp_input, - ipcomp_output + .xf_type = XF_IPCOMP, + .xf_name = "IPcomp", + .xf_init = ipcomp_init, + .xf_zeroize = ipcomp_zeroize, + .xf_input = ipcomp_input, + .xf_output = ipcomp_output, }; static void ipcomp_attach(void) { - xform_register(&ipcomp_xformsw); +#ifdef INET + ipe4_cookie = encap_attach_func(AF_INET, -1, + ipcomp4_nonexp_encapcheck, &ipcomp4_protosw, NULL); +#endif +#ifdef INET6 + ipe6_cookie = encap_attach_func(AF_INET6, -1, + ipcomp6_nonexp_encapcheck, &ipcomp6_protosw, NULL); +#endif + xform_attach(&ipcomp_xformsw); } -SYSINIT(ipcomp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ipcomp_attach, NULL); - static void -vnet_ipcomp_attach(const void *unused __unused) +ipcomp_detach(void) { - /* XXX */ - V_ipcompstat.version = IPCOMPSTAT_VERSION; +#ifdef INET + encap_detach(ipe4_cookie); +#endif +#ifdef INET6 + encap_detach(ipe6_cookie); +#endif + xform_detach(&ipcomp_xformsw); } -VNET_SYSINIT(vnet_ipcomp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, - vnet_ipcomp_attach, NULL); +SYSINIT(ipcomp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + ipcomp_attach, NULL); +SYSUNINIT(ipcomp_xform_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + ipcomp_detach, NULL); diff --git a/freebsd/sys/netipsec/xform_ipip.c b/freebsd/sys/netipsec/xform_ipip.c deleted file mode 100644 index b7234be9..00000000 --- a/freebsd/sys/netipsec/xform_ipip.c +++ /dev/null @@ -1,728 +0,0 @@ -#include - -/* $FreeBSD$ */ -/* $OpenBSD: ip_ipip.c,v 1.25 2002/06/10 18:04:55 itojun Exp $ */ -/*- - * The authors of this code are John Ioannidis (ji@tla.org), - * Angelos D. Keromytis (kermit@csd.uch.gr) and - * Niels Provos (provos@physnet.uni-hamburg.de). - * - * The original version of this code was written by John Ioannidis - * for BSD/OS in Athens, Greece, in November 1995. - * - * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, - * by Angelos D. Keromytis. - * - * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis - * and Niels Provos. - * - * Additional features in 1999 by Angelos D. Keromytis. - * - * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, - * Angelos D. Keromytis and Niels Provos. - * Copyright (c) 2001, Angelos D. Keromytis. - * - * Permission to use, copy, and modify this software with or without fee - * is hereby granted, provided that this entire notice is included in - * all copies of any software which is or includes a copy or - * modification of this software. - * You may use this code under the GNU public license if you so wish. Please - * contribute changes back to the authors under this freer than GPL license - * so that we may further the use of strong encryption without limitations to - * all. - * - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE - * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR - * PURPOSE. - */ - -/* - * IP-inside-IP processing - */ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#ifdef MROUTING -#include -#endif - -#include -#include - -#include - -#ifdef INET6 -#include -#include -#include -#include -#include -#endif - -#include -#include - -#include - -/* - * We can control the acceptance of IP4 packets by altering the sysctl - * net.inet.ipip.allow value. Zero means drop them, all else is acceptance. - */ -VNET_DEFINE(int, ipip_allow) = 0; -VNET_DEFINE(struct ipipstat, ipipstat); - -SYSCTL_DECL(_net_inet_ipip); -SYSCTL_VNET_INT(_net_inet_ipip, OID_AUTO, - ipip_allow, CTLFLAG_RW, &VNET_NAME(ipip_allow), 0, ""); -SYSCTL_VNET_STRUCT(_net_inet_ipip, IPSECCTL_STATS, - stats, CTLFLAG_RD, &VNET_NAME(ipipstat), ipipstat, ""); - -/* XXX IPCOMP */ -#define M_IPSEC (M_AUTHIPHDR|M_AUTHIPDGM|M_DECRYPTED) - -static void _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp); - -#ifdef INET6 -/* - * Really only a wrapper for ipip_input(), for use with IPv6. - */ -int -ip4_input6(struct mbuf **m, int *offp, int proto) -{ -#if 0 - /* If we do not accept IP-in-IP explicitly, drop. */ - if (!V_ipip_allow && ((*m)->m_flags & M_IPSEC) == 0) { - DPRINTF(("%s: dropped due to policy\n", __func__)); - IPIPSTAT_INC(ipips_pdrops); - m_freem(*m); - return IPPROTO_DONE; - } -#endif - _ipip_input(*m, *offp, NULL); - return IPPROTO_DONE; -} -#endif /* INET6 */ - -#ifdef INET -/* - * Really only a wrapper for ipip_input(), for use with IPv4. - */ -void -ip4_input(struct mbuf *m, int off) -{ -#if 0 - /* If we do not accept IP-in-IP explicitly, drop. */ - if (!V_ipip_allow && (m->m_flags & M_IPSEC) == 0) { - DPRINTF(("%s: dropped due to policy\n", __func__)); - IPIPSTAT_INC(ipips_pdrops); - m_freem(m); - return; - } -#endif - _ipip_input(m, off, NULL); -} -#endif /* INET */ - -/* - * ipip_input gets called when we receive an IP{46} encapsulated packet, - * either because we got it at a real interface, or because AH or ESP - * were being used in tunnel mode (in which case the rcvif element will - * contain the address of the encX interface associated with the tunnel. - */ - -static void -_ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp) -{ -#ifdef INET - register struct sockaddr_in *sin; -#endif - register struct ifnet *ifp; - register struct ifaddr *ifa; - struct ip *ipo; -#ifdef INET6 - register struct sockaddr_in6 *sin6; - struct ip6_hdr *ip6 = NULL; - u_int8_t itos; -#endif - u_int8_t nxt; - int isr; - u_int8_t otos; - u_int8_t v; - int hlen; - - IPIPSTAT_INC(ipips_ipackets); - - m_copydata(m, 0, 1, &v); - - switch (v >> 4) { -#ifdef INET - case 4: - hlen = sizeof(struct ip); - break; -#endif /* INET */ -#ifdef INET6 - case 6: - hlen = sizeof(struct ip6_hdr); - break; -#endif - default: - IPIPSTAT_INC(ipips_family); - m_freem(m); - return /* EAFNOSUPPORT */; - } - - /* Bring the IP header in the first mbuf, if not there already */ - if (m->m_len < hlen) { - if ((m = m_pullup(m, hlen)) == NULL) { - DPRINTF(("%s: m_pullup (1) failed\n", __func__)); - IPIPSTAT_INC(ipips_hdrops); - return; - } - } - - ipo = mtod(m, struct ip *); - -#ifdef MROUTING - if (ipo->ip_v == IPVERSION && ipo->ip_p == IPPROTO_IPV4) { - if (IN_MULTICAST(((struct ip *)((char *) ipo + iphlen))->ip_dst.s_addr)) { - ipip_mroute_input (m, iphlen); - return; - } - } -#endif /* MROUTING */ - - /* Keep outer ecn field. */ - switch (v >> 4) { -#ifdef INET - case 4: - otos = ipo->ip_tos; - break; -#endif /* INET */ -#ifdef INET6 - case 6: - otos = (ntohl(mtod(m, struct ip6_hdr *)->ip6_flow) >> 20) & 0xff; - break; -#endif - default: - panic("ipip_input: unknown ip version %u (outer)", v>>4); - } - - /* Remove outer IP header */ - m_adj(m, iphlen); - - /* Sanity check */ - if (m->m_pkthdr.len < sizeof(struct ip)) { - IPIPSTAT_INC(ipips_hdrops); - m_freem(m); - return; - } - - m_copydata(m, 0, 1, &v); - - switch (v >> 4) { -#ifdef INET - case 4: - hlen = sizeof(struct ip); - break; -#endif /* INET */ - -#ifdef INET6 - case 6: - hlen = sizeof(struct ip6_hdr); - break; -#endif - default: - IPIPSTAT_INC(ipips_family); - m_freem(m); - return; /* EAFNOSUPPORT */ - } - - /* - * Bring the inner IP header in the first mbuf, if not there already. - */ - if (m->m_len < hlen) { - if ((m = m_pullup(m, hlen)) == NULL) { - DPRINTF(("%s: m_pullup (2) failed\n", __func__)); - IPIPSTAT_INC(ipips_hdrops); - return; - } - } - - /* - * RFC 1853 specifies that the inner TTL should not be touched on - * decapsulation. There's no reason this comment should be here, but - * this is as good as any a position. - */ - - /* Some sanity checks in the inner IP header */ - switch (v >> 4) { -#ifdef INET - case 4: - ipo = mtod(m, struct ip *); - nxt = ipo->ip_p; - ip_ecn_egress(V_ip4_ipsec_ecn, &otos, &ipo->ip_tos); - break; -#endif /* INET */ -#ifdef INET6 - case 6: - ip6 = (struct ip6_hdr *) ipo; - nxt = ip6->ip6_nxt; - itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; - ip_ecn_egress(V_ip6_ipsec_ecn, &otos, &itos); - ip6->ip6_flow &= ~htonl(0xff << 20); - ip6->ip6_flow |= htonl((u_int32_t) itos << 20); - break; -#endif - default: - panic("ipip_input: unknown ip version %u (inner)", v>>4); - } - - /* Check for local address spoofing. */ - if ((m->m_pkthdr.rcvif == NULL || - !(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK)) && - V_ipip_allow != 2) { - IFNET_RLOCK_NOSLEEP(); - TAILQ_FOREACH(ifp, &V_ifnet, if_link) { - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { -#ifdef INET - if (ipo) { - if (ifa->ifa_addr->sa_family != - AF_INET) - continue; - - sin = (struct sockaddr_in *) ifa->ifa_addr; - - if (sin->sin_addr.s_addr == - ipo->ip_src.s_addr) { - IPIPSTAT_INC(ipips_spoof); - m_freem(m); - IFNET_RUNLOCK_NOSLEEP(); - return; - } - } -#endif /* INET */ - -#ifdef INET6 - if (ip6) { - if (ifa->ifa_addr->sa_family != - AF_INET6) - continue; - - sin6 = (struct sockaddr_in6 *) ifa->ifa_addr; - - if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &ip6->ip6_src)) { - IPIPSTAT_INC(ipips_spoof); - m_freem(m); - IFNET_RUNLOCK_NOSLEEP(); - return; - } - - } -#endif /* INET6 */ - } - } - IFNET_RUNLOCK_NOSLEEP(); - } - - /* Statistics */ - IPIPSTAT_ADD(ipips_ibytes, m->m_pkthdr.len - iphlen); - -#ifdef DEV_ENC - switch (v >> 4) { -#ifdef INET - case 4: - ipsec_bpf(m, NULL, AF_INET, ENC_IN|ENC_AFTER); - break; -#endif -#ifdef INET6 - case 6: - ipsec_bpf(m, NULL, AF_INET6, ENC_IN|ENC_AFTER); - break; -#endif - default: - panic("%s: bogus ip version %u", __func__, v>>4); - } - /* pass the mbuf to enc0 for packet filtering */ - if (ipsec_filter(&m, PFIL_IN, ENC_IN|ENC_AFTER) != 0) - return; -#endif - - /* - * Interface pointer stays the same; if no IPsec processing has - * been done (or will be done), this will point to a normal - * interface. Otherwise, it'll point to an enc interface, which - * will allow a packet filter to distinguish between secure and - * untrusted packets. - */ - - switch (v >> 4) { -#ifdef INET - case 4: - isr = NETISR_IP; - break; -#endif -#ifdef INET6 - case 6: - isr = NETISR_IPV6; - break; -#endif - default: - panic("%s: bogus ip version %u", __func__, v>>4); - } - - m_addr_changed(m); - - if (netisr_queue(isr, m)) { /* (0) on success. */ - IPIPSTAT_INC(ipips_qfull); - DPRINTF(("%s: packet dropped because of full queue\n", - __func__)); - } -} - -int -ipip_output( - struct mbuf *m, - struct ipsecrequest *isr, - struct mbuf **mp, - int skip, - int protoff -) -{ - struct secasvar *sav; - u_int8_t tp, otos; - struct secasindex *saidx; - int error; -#if defined(INET) || defined(INET6) - u_int8_t itos; -#endif -#ifdef INET - struct ip *ipo; -#endif /* INET */ -#ifdef INET6 - struct ip6_hdr *ip6, *ip6o; -#endif /* INET6 */ - - sav = isr->sav; - IPSEC_ASSERT(sav != NULL, ("null SA")); - IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); - - /* XXX Deal with empty TDB source/destination addresses. */ - - m_copydata(m, 0, 1, &tp); - tp = (tp >> 4) & 0xff; /* Get the IP version number. */ - - saidx = &sav->sah->saidx; - switch (saidx->dst.sa.sa_family) { -#ifdef INET - case AF_INET: - if (saidx->src.sa.sa_family != AF_INET || - saidx->src.sin.sin_addr.s_addr == INADDR_ANY || - saidx->dst.sin.sin_addr.s_addr == INADDR_ANY) { - DPRINTF(("%s: unspecified tunnel endpoint " - "address in SA %s/%08lx\n", __func__, - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - IPIPSTAT_INC(ipips_unspec); - error = EINVAL; - goto bad; - } - - M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); - if (m == 0) { - DPRINTF(("%s: M_PREPEND failed\n", __func__)); - IPIPSTAT_INC(ipips_hdrops); - error = ENOBUFS; - goto bad; - } - - ipo = mtod(m, struct ip *); - - ipo->ip_v = IPVERSION; - ipo->ip_hl = 5; - ipo->ip_len = htons(m->m_pkthdr.len); - ipo->ip_ttl = V_ip_defttl; - ipo->ip_sum = 0; - ipo->ip_src = saidx->src.sin.sin_addr; - ipo->ip_dst = saidx->dst.sin.sin_addr; - - ipo->ip_id = ip_newid(); - - /* If the inner protocol is IP... */ - switch (tp) { - case IPVERSION: - /* Save ECN notification */ - m_copydata(m, sizeof(struct ip) + - offsetof(struct ip, ip_tos), - sizeof(u_int8_t), (caddr_t) &itos); - - ipo->ip_p = IPPROTO_IPIP; - - /* - * We should be keeping tunnel soft-state and - * send back ICMPs if needed. - */ - m_copydata(m, sizeof(struct ip) + - offsetof(struct ip, ip_off), - sizeof(u_int16_t), (caddr_t) &ipo->ip_off); - ipo->ip_off = ntohs(ipo->ip_off); - ipo->ip_off &= ~(IP_DF | IP_MF | IP_OFFMASK); - ipo->ip_off = htons(ipo->ip_off); - break; -#ifdef INET6 - case (IPV6_VERSION >> 4): - { - u_int32_t itos32; - - /* Save ECN notification. */ - m_copydata(m, sizeof(struct ip) + - offsetof(struct ip6_hdr, ip6_flow), - sizeof(u_int32_t), (caddr_t) &itos32); - itos = ntohl(itos32) >> 20; - ipo->ip_p = IPPROTO_IPV6; - ipo->ip_off = 0; - break; - } -#endif /* INET6 */ - default: - goto nofamily; - } - - otos = 0; - ip_ecn_ingress(ECN_ALLOWED, &otos, &itos); - ipo->ip_tos = otos; - break; -#endif /* INET */ - -#ifdef INET6 - case AF_INET6: - if (IN6_IS_ADDR_UNSPECIFIED(&saidx->dst.sin6.sin6_addr) || - saidx->src.sa.sa_family != AF_INET6 || - IN6_IS_ADDR_UNSPECIFIED(&saidx->src.sin6.sin6_addr)) { - DPRINTF(("%s: unspecified tunnel endpoint " - "address in SA %s/%08lx\n", __func__, - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - IPIPSTAT_INC(ipips_unspec); - error = ENOBUFS; - goto bad; - } - - /* scoped address handling */ - ip6 = mtod(m, struct ip6_hdr *); - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) - ip6->ip6_src.s6_addr16[1] = 0; - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) - ip6->ip6_dst.s6_addr16[1] = 0; - - M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT); - if (m == 0) { - DPRINTF(("%s: M_PREPEND failed\n", __func__)); - IPIPSTAT_INC(ipips_hdrops); - error = ENOBUFS; - goto bad; - } - - /* Initialize IPv6 header */ - ip6o = mtod(m, struct ip6_hdr *); - ip6o->ip6_flow = 0; - ip6o->ip6_vfc &= ~IPV6_VERSION_MASK; - ip6o->ip6_vfc |= IPV6_VERSION; - ip6o->ip6_plen = htons(m->m_pkthdr.len); - ip6o->ip6_hlim = V_ip_defttl; - ip6o->ip6_dst = saidx->dst.sin6.sin6_addr; - ip6o->ip6_src = saidx->src.sin6.sin6_addr; - - switch (tp) { -#ifdef INET - case IPVERSION: - /* Save ECN notification */ - m_copydata(m, sizeof(struct ip6_hdr) + - offsetof(struct ip, ip_tos), sizeof(u_int8_t), - (caddr_t) &itos); - - /* This is really IPVERSION. */ - ip6o->ip6_nxt = IPPROTO_IPIP; - break; -#endif /* INET */ - case (IPV6_VERSION >> 4): - { - u_int32_t itos32; - - /* Save ECN notification. */ - m_copydata(m, sizeof(struct ip6_hdr) + - offsetof(struct ip6_hdr, ip6_flow), - sizeof(u_int32_t), (caddr_t) &itos32); - itos = ntohl(itos32) >> 20; - - ip6o->ip6_nxt = IPPROTO_IPV6; - break; - } - default: - goto nofamily; - } - - otos = 0; - ip_ecn_ingress(ECN_ALLOWED, &otos, &itos); - ip6o->ip6_flow |= htonl((u_int32_t) otos << 20); - break; -#endif /* INET6 */ - - default: -nofamily: - DPRINTF(("%s: unsupported protocol family %u\n", __func__, - saidx->dst.sa.sa_family)); - IPIPSTAT_INC(ipips_family); - error = EAFNOSUPPORT; /* XXX diffs from openbsd */ - goto bad; - } - - IPIPSTAT_INC(ipips_opackets); - *mp = m; - -#ifdef INET - if (saidx->dst.sa.sa_family == AF_INET) { -#if 0 - if (sav->tdb_xform->xf_type == XF_IP4) - tdb->tdb_cur_bytes += - m->m_pkthdr.len - sizeof(struct ip); -#endif - IPIPSTAT_ADD(ipips_obytes, - m->m_pkthdr.len - sizeof(struct ip)); - } -#endif /* INET */ - -#ifdef INET6 - if (saidx->dst.sa.sa_family == AF_INET6) { -#if 0 - if (sav->tdb_xform->xf_type == XF_IP4) - tdb->tdb_cur_bytes += - m->m_pkthdr.len - sizeof(struct ip6_hdr); -#endif - IPIPSTAT_ADD(ipips_obytes, - m->m_pkthdr.len - sizeof(struct ip6_hdr)); - } -#endif /* INET6 */ - - return 0; -bad: - if (m) - m_freem(m); - *mp = NULL; - return (error); -} - -#ifdef IPSEC -#if defined(INET) || defined(INET6) -static int -ipe4_init(struct secasvar *sav, struct xformsw *xsp) -{ - sav->tdb_xform = xsp; - return 0; -} - -static int -ipe4_zeroize(struct secasvar *sav) -{ - sav->tdb_xform = NULL; - return 0; -} - -static int -ipe4_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) -{ - /* This is a rather serious mistake, so no conditional printing. */ - printf("%s: should never be called\n", __func__); - if (m) - m_freem(m); - return EOPNOTSUPP; -} - -static struct xformsw ipe4_xformsw = { - XF_IP4, 0, "IPv4 Simple Encapsulation", - ipe4_init, ipe4_zeroize, ipe4_input, ipip_output, -}; - -extern struct domain inetdomain; -#endif /* INET || INET6 */ -#ifdef INET -static struct protosw ipe4_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_IPV4, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, - .pr_input = ip4_input, - .pr_ctloutput = rip_ctloutput, - .pr_usrreqs = &rip_usrreqs -}; -#endif /* INET */ -#if defined(INET6) && defined(INET) -static struct ip6protosw ipe6_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_IPV6, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, - .pr_input = ip4_input6, - .pr_ctloutput = rip_ctloutput, - .pr_usrreqs = &rip_usrreqs -}; -#endif /* INET6 && INET */ - -#if defined(INET) -/* - * Check the encapsulated packet to see if we want it - */ -static int -ipe4_encapcheck(const struct mbuf *m, int off, int proto, void *arg) -{ - /* - * Only take packets coming from IPSEC tunnels; the rest - * must be handled by the gif tunnel code. Note that we - * also return a minimum priority when we want the packet - * so any explicit gif tunnels take precedence. - */ - return ((m->m_flags & M_IPSEC) != 0 ? 1 : 0); -} -#endif /* INET */ - -static void -ipe4_attach(void) -{ - - xform_register(&ipe4_xformsw); - /* attach to encapsulation framework */ - /* XXX save return cookie for detach on module remove */ -#ifdef INET - (void) encap_attach_func(AF_INET, -1, - ipe4_encapcheck, &ipe4_protosw, NULL); -#endif -#if defined(INET6) && defined(INET) - (void) encap_attach_func(AF_INET6, -1, - ipe4_encapcheck, (struct protosw *)&ipe6_protosw, NULL); -#endif -} -SYSINIT(ipe4_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ipe4_attach, NULL); -#endif /* IPSEC */ diff --git a/freebsd/sys/netipsec/xform_tcp.c b/freebsd/sys/netipsec/xform_tcp.c index 398dca13..81b4f1d0 100644 --- a/freebsd/sys/netipsec/xform_tcp.c +++ b/freebsd/sys/netipsec/xform_tcp.c @@ -1,9 +1,8 @@ #include -/* $FreeBSD$ */ - /*- * Copyright (c) 2003 Bruce M. Simpson + * Copyright (c) 2016 Andrey V. Elsukov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,29 +29,37 @@ */ /* TCP MD5 Signature Option (RFC2385) */ +#include +__FBSDID("$FreeBSD$"); + #include #include +#include #include #include #include #include +#include +#include #include +#include #include +#include #include -#include #include +#include #include #include #include #include #include -#include #include #include +#include #include #ifdef INET6 @@ -63,13 +70,256 @@ #include #include +#define TCP_SIGLEN 16 /* length of computed digest in bytes */ +#define TCP_KEYLEN_MIN 1 /* minimum length of TCP-MD5 key */ +#define TCP_KEYLEN_MAX 80 /* maximum length of TCP-MD5 key */ + +static int +tcp_ipsec_pcbctl(struct inpcb *inp, struct sockopt *sopt) +{ + struct tcpcb *tp; + int error, optval; + + INP_WLOCK_ASSERT(inp); + if (sopt->sopt_name != TCP_MD5SIG) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + + tp = intotcpcb(inp); + if (sopt->sopt_dir == SOPT_GET) { + optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; + INP_WUNLOCK(inp); + + /* On success return with released INP_WLOCK */ + return (sooptcopyout(sopt, &optval, sizeof(optval))); + } + + INP_WUNLOCK(inp); + + error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); + if (error != 0) + return (error); + + /* INP_WLOCK_RECHECK */ + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_WUNLOCK(inp); + return (ECONNRESET); + } + if (optval > 0) + tp->t_flags |= TF_SIGNATURE; + else + tp->t_flags &= ~TF_SIGNATURE; + + /* On success return with acquired INP_WLOCK */ + return (error); +} + +/* + * Callback function invoked by m_apply() to digest TCP segment data + * contained within an mbuf chain. + */ +static int +tcp_signature_apply(void *fstate, void *data, u_int len) +{ + + MD5Update(fstate, (u_char *)data, len); + return (0); +} + +#ifdef INET +static int +ip_pseudo_compute(struct mbuf *m, MD5_CTX *ctx) +{ + struct ippseudo ipp; + struct ip *ip; + + ip = mtod(m, struct ip *); + ipp.ippseudo_src.s_addr = ip->ip_src.s_addr; + ipp.ippseudo_dst.s_addr = ip->ip_dst.s_addr; + ipp.ippseudo_p = IPPROTO_TCP; + ipp.ippseudo_pad = 0; + ipp.ippseudo_len = htons(m->m_pkthdr.len - (ip->ip_hl << 2)); + MD5Update(ctx, (char *)&ipp, sizeof(ipp)); + return (ip->ip_hl << 2); +} +#endif + +#ifdef INET6 +static int +ip6_pseudo_compute(struct mbuf *m, MD5_CTX *ctx) +{ + struct ip6_pseudo { + struct in6_addr src, dst; + uint32_t len; + uint32_t nxt; + } ip6p __aligned(4); + struct ip6_hdr *ip6; + + ip6 = mtod(m, struct ip6_hdr *); + ip6p.src = ip6->ip6_src; + ip6p.dst = ip6->ip6_dst; + ip6p.len = htonl(m->m_pkthdr.len - sizeof(*ip6)); /* XXX: ext headers */ + ip6p.nxt = htonl(IPPROTO_TCP); + MD5Update(ctx, (char *)&ip6p, sizeof(ip6p)); + return (sizeof(*ip6)); +} +#endif + +static int +tcp_signature_compute(struct mbuf *m, struct tcphdr *th, + struct secasvar *sav, u_char *buf) +{ + MD5_CTX ctx; + int len; + u_short csum; + + MD5Init(&ctx); + /* Step 1: Update MD5 hash with IP(v6) pseudo-header. */ + switch (sav->sah->saidx.dst.sa.sa_family) { +#ifdef INET + case AF_INET: + len = ip_pseudo_compute(m, &ctx); + break; +#endif +#ifdef INET6 + case AF_INET6: + len = ip6_pseudo_compute(m, &ctx); + break; +#endif + default: + return (EAFNOSUPPORT); + } + /* + * Step 2: Update MD5 hash with TCP header, excluding options. + * The TCP checksum must be set to zero. + */ + csum = th->th_sum; + th->th_sum = 0; + MD5Update(&ctx, (char *)th, sizeof(struct tcphdr)); + th->th_sum = csum; + /* + * Step 3: Update MD5 hash with TCP segment data. + * Use m_apply() to avoid an early m_pullup(). + */ + len += (th->th_off << 2); + if (m->m_pkthdr.len - len > 0) + m_apply(m, len, m->m_pkthdr.len - len, + tcp_signature_apply, &ctx); + /* + * Step 4: Update MD5 hash with shared secret. + */ + MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth)); + MD5Final(buf, &ctx); + key_sa_recordxfer(sav, m); + return (0); +} + +static void +setsockaddrs(const struct mbuf *m, union sockaddr_union *src, + union sockaddr_union *dst) +{ + struct ip *ip; + + IPSEC_ASSERT(m->m_len >= sizeof(*ip), ("unexpected mbuf len")); + + ip = mtod(m, struct ip *); + switch (ip->ip_v) { +#ifdef INET + case IPVERSION: + ipsec4_setsockaddrs(m, src, dst); + break; +#endif +#ifdef INET6 + case (IPV6_VERSION >> 4): + ipsec6_setsockaddrs(m, src, dst); + break; +#endif + default: + bzero(src, sizeof(*src)); + bzero(dst, sizeof(*dst)); + } +} + +/* + * Compute TCP-MD5 hash of an *INBOUND* TCP segment. + * Parameters: + * m pointer to head of mbuf chain + * th pointer to TCP header + * buf pointer to storage for computed MD5 digest + * + * Return 0 if successful, otherwise return -1. + */ +static int +tcp_ipsec_input(struct mbuf *m, struct tcphdr *th, u_char *buf) +{ + char tmpdigest[TCP_SIGLEN]; + struct secasindex saidx; + struct secasvar *sav; + + setsockaddrs(m, &saidx.src, &saidx.dst); + saidx.proto = IPPROTO_TCP; + saidx.mode = IPSEC_MODE_TCPMD5; + saidx.reqid = 0; + sav = key_allocsa_tcpmd5(&saidx); + if (sav == NULL) { + KMOD_TCPSTAT_INC(tcps_sig_err_buildsig); + return (EACCES); + } + /* + * tcp_input() operates with TCP header fields in host + * byte order. We expect them in network byte order. + */ + tcp_fields_to_net(th); + tcp_signature_compute(m, th, sav, tmpdigest); + tcp_fields_to_host(th); + key_freesav(&sav); + if (bcmp(buf, tmpdigest, TCP_SIGLEN) != 0) { + KMOD_TCPSTAT_INC(tcps_sig_rcvbadsig); + return (EACCES); + } + KMOD_TCPSTAT_INC(tcps_sig_rcvgoodsig); + return (0); +} + +/* + * Compute TCP-MD5 hash of an *OUTBOUND* TCP segment. + * Parameters: + * m pointer to head of mbuf chain + * th pointer to TCP header + * buf pointer to storage for computed MD5 digest + * + * Return 0 if successful, otherwise return error code. + */ +static int +tcp_ipsec_output(struct mbuf *m, struct tcphdr *th, u_char *buf) +{ + struct secasindex saidx; + struct secasvar *sav; + + setsockaddrs(m, &saidx.src, &saidx.dst); + saidx.proto = IPPROTO_TCP; + saidx.mode = IPSEC_MODE_TCPMD5; + saidx.reqid = 0; + sav = key_allocsa_tcpmd5(&saidx); + if (sav == NULL) { + KMOD_TCPSTAT_INC(tcps_sig_err_buildsig); + return (EACCES); + } + tcp_signature_compute(m, th, sav, buf); + key_freesav(&sav); + return (0); +} + /* * Initialize a TCP-MD5 SA. Called when the SA is being set up. * * We don't need to set up the tdb prefixed fields, as we don't use the * opencrypto code; we just perform a key length check. * - * XXX: Currently we only allow a single 'magic' SPI to be used. + * XXX: Currently we have used single 'magic' SPI and need to still + * support this. * * This allows per-host granularity without affecting the userland * interface, which is a simple socket option toggle switch, @@ -88,11 +338,6 @@ tcpsignature_init(struct secasvar *sav, struct xformsw *xsp) { int keylen; - if (sav->spi != htonl(TCP_SIG_SPI)) { - DPRINTF(("%s: SPI must be TCP_SIG_SPI (0x1000)\n", - __func__)); - return (EINVAL); - } if (sav->alg_auth != SADB_X_AALG_TCP_MD5) { DPRINTF(("%s: unsupported authentication algorithm %u\n", __func__, sav->alg_auth)); @@ -107,67 +352,76 @@ tcpsignature_init(struct secasvar *sav, struct xformsw *xsp) DPRINTF(("%s: invalid key length %u\n", __func__, keylen)); return (EINVAL); } - + sav->tdb_xform = xsp; return (0); } /* - * Paranoia. - * * Called when the SA is deleted. */ static int tcpsignature_zeroize(struct secasvar *sav) { - if (sav->key_auth) + if (sav->key_auth != NULL) bzero(sav->key_auth->key_data, _KEYLEN(sav->key_auth)); - - sav->tdb_cryptoid = 0; - sav->tdb_authalgxform = NULL; sav->tdb_xform = NULL; - return (0); } -/* - * Verify that an input packet passes authentication. - * Called from the ipsec layer. - * We do this from within tcp itself, so this routine is just a stub. - */ -static int -tcpsignature_input(struct mbuf *m, struct secasvar *sav, int skip, - int protoff) -{ +static struct xformsw tcpsignature_xformsw = { + .xf_type = XF_TCPSIGNATURE, + .xf_name = "TCP-MD5", + .xf_init = tcpsignature_init, + .xf_zeroize = tcpsignature_zeroize, +}; - return (0); -} +static const struct tcpmd5_methods tcpmd5_methods = { + .input = tcp_ipsec_input, + .output = tcp_ipsec_output, + .pcbctl = tcp_ipsec_pcbctl, +}; + +#ifndef KLD_MODULE +/* TCP-MD5 support is build in the kernel */ +static const struct tcpmd5_support tcpmd5_ipsec = { + .enabled = IPSEC_MODULE_ENABLED, + .methods = &tcpmd5_methods +}; +const struct tcpmd5_support * const tcp_ipsec_support = &tcpmd5_ipsec; +#endif /* !KLD_MODULE */ -/* - * Prepend the authentication header. - * Called from the ipsec layer. - * We do this from within tcp itself, so this routine is just a stub. - */ static int -tcpsignature_output(struct mbuf *m, struct ipsecrequest *isr, - struct mbuf **mp, int skip, int protoff) +tcpmd5_modevent(module_t mod, int type, void *data) { - return (EINVAL); + switch (type) { + case MOD_LOAD: + xform_attach(&tcpsignature_xformsw); +#ifdef KLD_MODULE + tcpmd5_support_enable(&tcpmd5_methods); +#endif + break; + case MOD_UNLOAD: +#ifdef KLD_MODULE + tcpmd5_support_disable(); +#endif + xform_detach(&tcpsignature_xformsw); + break; + default: + return (EOPNOTSUPP); + } + return (0); } -static struct xformsw tcpsignature_xformsw = { - XF_TCPSIGNATURE, XFT_AUTH, "TCPMD5", - tcpsignature_init, tcpsignature_zeroize, - tcpsignature_input, tcpsignature_output +static moduledata_t tcpmd5_mod = { + "tcpmd5", + tcpmd5_modevent, + 0 }; -static void -tcpsignature_attach(void) -{ - - xform_register(&tcpsignature_xformsw); -} - -SYSINIT(tcpsignature_xform_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, - tcpsignature_attach, NULL); +DECLARE_MODULE(tcpmd5, tcpmd5_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); +MODULE_VERSION(tcpmd5, 1); +#ifdef KLD_MODULE +MODULE_DEPEND(tcpmd5, ipsec_support, 1, 1, 1); +#endif diff --git a/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h b/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h index aa2fceba..7512d327 100644 --- a/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h +++ b/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h @@ -37,16 +37,16 @@ #define DN_AQM_PIE 2 #define PIE_DQ_THRESHOLD_BITS 14 /* 2^14 =16KB */ -#define PIE_DQ_THRESHOLD (1UL << PIE_DQ_THRESHOLD_BITS) +#define PIE_DQ_THRESHOLD (1L << PIE_DQ_THRESHOLD_BITS) #define MEAN_PKTSIZE 800 /* 31-bits because random() generates range from 0->(2**31)-1 */ #define PIE_PROB_BITS 31 -#define PIE_MAX_PROB ((1ULL<name[0] != '\0') { /* match by name */ if (cmd->name[0] == '\1') /* use tablearg to match */ - return ipfw_lookup_table_extended(chain, cmd->p.kidx, 0, - &ifp->if_index, tablearg); + return ipfw_lookup_table(chain, cmd->p.kidx, 0, + &ifp->if_index, tablearg); /* Check name */ if (cmd->p.glob) { if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) @@ -1002,7 +1002,6 @@ ipfw_chk(struct ip_fw_args *args) int is_ipv4 = 0; int done = 0; /* flag to exit the outer loop */ - IPFW_RLOCK_TRACKER; if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) return (IP_FW_PASS); /* accept */ @@ -1465,96 +1464,142 @@ do { \ src_ip.s_addr); break; - case O_IP_SRC_LOOKUP: case O_IP_DST_LOOKUP: - if (is_ipv4) { - uint32_t key = - (cmd->opcode == O_IP_DST_LOOKUP) ? - dst_ip.s_addr : src_ip.s_addr; - uint32_t v = 0; - - if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) { - /* generic lookup. The key must be - * in 32bit big-endian format. - */ - v = ((ipfw_insn_u32 *)cmd)->d[1]; - if (v == 0) - key = dst_ip.s_addr; - else if (v == 1) - key = src_ip.s_addr; - else if (v == 6) /* dscp */ - key = (ip->ip_tos >> 2) & 0x3f; - else if (offset != 0) - break; - else if (proto != IPPROTO_TCP && - proto != IPPROTO_UDP) - break; - else if (v == 2) - key = dst_port; - else if (v == 3) - key = src_port; + { + void *pkey; + uint32_t vidx, key; + uint16_t keylen; + + if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) { + /* Determine lookup key type */ + vidx = ((ipfw_insn_u32 *)cmd)->d[1]; + if (vidx != 4 /* uid */ && + vidx != 5 /* jail */ && + is_ipv6 == 0 && is_ipv4 == 0) + break; + /* Determine key length */ + if (vidx == 0 /* dst-ip */ || + vidx == 1 /* src-ip */) + keylen = is_ipv6 ? + sizeof(struct in6_addr): + sizeof(in_addr_t); + else { + keylen = sizeof(key); + pkey = &key; + } + if (vidx == 0 /* dst-ip */) + pkey = is_ipv4 ? (void *)&dst_ip: + (void *)&args->f_id.dst_ip6; + else if (vidx == 1 /* src-ip */) + pkey = is_ipv4 ? (void *)&src_ip: + (void *)&args->f_id.src_ip6; + else if (vidx == 6 /* dscp */) { + if (is_ipv4) + key = ip->ip_tos >> 2; + else { + key = args->f_id.flow_id6; + key = (key & 0x0f) << 2 | + (key & 0xf000) >> 14; + } + key &= 0x3f; + } else if (vidx == 2 /* dst-port */ || + vidx == 3 /* src-port */) { + /* Skip fragments */ + if (offset != 0) + break; + /* Skip proto without ports */ + if (proto != IPPROTO_TCP && + proto != IPPROTO_UDP && + proto != IPPROTO_SCTP) + break; + if (vidx == 2 /* dst-port */) + key = dst_port; + else + key = src_port; + } #ifndef USERSPACE - else if (v == 4 || v == 5) { - check_uidgid( - (ipfw_insn_u32 *)cmd, - args, &ucred_lookup, + else if (vidx == 4 /* uid */ || + vidx == 5 /* jail */) { + check_uidgid( + (ipfw_insn_u32 *)cmd, + args, &ucred_lookup, #ifdef __FreeBSD__ - &ucred_cache); - if (v == 4 /* O_UID */) + &ucred_cache); + if (vidx == 4 /* uid */) #ifndef __rtems__ - key = ucred_cache->cr_uid; + key = ucred_cache->cr_uid; #else /* __rtems__ */ - key = BSD_DEFAULT_UID; + key = BSD_DEFAULT_UID; #endif /* __rtems__ */ - else if (v == 5 /* O_JAIL */) + else if (vidx == 5 /* jail */) #ifndef __rtems__ - key = ucred_cache->cr_prison->pr_id; + key = ucred_cache->cr_prison->pr_id; #else /* __rtems__ */ - key = BSD_DEFAULT_PRISON->pr_id; + key = BSD_DEFAULT_PRISON->pr_id; #endif /* __rtems__ */ #else /* !__FreeBSD__ */ - (void *)&ucred_cache); - if (v ==4 /* O_UID */) - key = ucred_cache.uid; - else if (v == 5 /* O_JAIL */) - key = ucred_cache.xid; + (void *)&ucred_cache); + if (vidx == 4 /* uid */) + key = ucred_cache.uid; + else if (vidx == 5 /* jail */) + key = ucred_cache.xid; #endif /* !__FreeBSD__ */ } #endif /* !USERSPACE */ else - break; - } - match = ipfw_lookup_table(chain, - cmd->arg1, key, &v); - if (!match) + break; + match = ipfw_lookup_table(chain, + cmd->arg1, keylen, pkey, &vidx); + if (!match) + break; + tablearg = vidx; break; - if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) - match = - ((ipfw_insn_u32 *)cmd)->d[0] == v; - else - tablearg = v; + } + /* cmdlen =< F_INSN_SIZE(ipfw_insn_u32) */ + /* FALLTHROUGH */ + } + case O_IP_SRC_LOOKUP: + { + void *pkey; + uint32_t vidx; + uint16_t keylen; + + if (is_ipv4) { + keylen = sizeof(in_addr_t); + if (cmd->opcode == O_IP_DST_LOOKUP) + pkey = &dst_ip; + else + pkey = &src_ip; } else if (is_ipv6) { - uint32_t v = 0; - void *pkey = (cmd->opcode == O_IP_DST_LOOKUP) ? - &args->f_id.dst_ip6: &args->f_id.src_ip6; - match = ipfw_lookup_table_extended(chain, - cmd->arg1, - sizeof(struct in6_addr), - pkey, &v); - if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) - match = ((ipfw_insn_u32 *)cmd)->d[0] == v; - if (match) - tablearg = v; + keylen = sizeof(struct in6_addr); + if (cmd->opcode == O_IP_DST_LOOKUP) + pkey = &args->f_id.dst_ip6; + else + pkey = &args->f_id.src_ip6; + } else + break; + match = ipfw_lookup_table(chain, cmd->arg1, + keylen, pkey, &vidx); + if (!match) + break; + if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) { + match = ((ipfw_insn_u32 *)cmd)->d[0] == + TARG_VAL(chain, vidx, tag); + if (!match) + break; } + tablearg = vidx; break; + } case O_IP_FLOW_LOOKUP: { uint32_t v = 0; - match = ipfw_lookup_table_extended(chain, + match = ipfw_lookup_table(chain, cmd->arg1, 0, &args->f_id, &v); if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) - match = ((ipfw_insn_u32 *)cmd)->d[0] == v; + match = ((ipfw_insn_u32 *)cmd)->d[0] == + TARG_VAL(chain, v, tag); if (match) tablearg = v; } @@ -2583,6 +2628,22 @@ do { \ l = 0; /* in any case exit inner loop */ retval = ipfw_run_eaction(chain, args, cmd, &done); + /* + * If both @retval and @done are zero, + * consider this as rule matching and + * update counters. + */ + if (retval == 0 && done == 0) { + IPFW_INC_RULE_COUNTER(f, pktlen); + /* + * Reset the result of the last + * dynamic state lookup. + * External action can change + * @args content, and it may be + * used for new state lookup later. + */ + dyn_dir = MATCH_UNKNOWN; + } break; default: diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c b/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c index 5694b1d1..e21cb07e 100644 --- a/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c +++ b/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include /* for ETHERTYPE_IP */ #include #include +#include #include #include diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c b/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c index 2c6ba8b9..817aaca4 100644 --- a/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c +++ b/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c @@ -1,8 +1,8 @@ #include /*- - * Copyright (c) 2016 Yandex LLC - * Copyright (c) 2016 Andrey V. Elsukov + * Copyright (c) 2016-2017 Yandex LLC + * Copyright (c) 2016-2017 Andrey V. Elsukov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -59,7 +59,7 @@ __FBSDID("$FreeBSD$"); * rules. * Module should implement opcode handler with type ipfw_eaction_t. * This handler will be called by ipfw_chk() function when - * O_EXTERNAL_ACTION opcode will be matched. The handler must return + * O_EXTERNAL_ACTION opcode is matched. The handler must return * value used as return value in ipfw_chk(), i.e. IP_FW_PASS, * IP_FW_DENY (see ip_fw_private.h). * Also the last argument must be set by handler. If it is zero, @@ -71,9 +71,12 @@ __FBSDID("$FreeBSD$"); * This function will return eaction_id, that can be used by module. * * It is possible to pass some additional information to external - * action handler via the O_EXTERNAL_INSTANCE opcode. This opcode - * will be next after the O_EXTERNAL_ACTION opcode. cmd->arg1 will - * contain index of named object related to instance of external action. + * action handler using O_EXTERNAL_INSTANCE and O_EXTERNAL_DATA opcodes. + * Such opcodes should be next after the O_EXTERNAL_ACTION opcode. + * For the O_EXTERNAL_INSTANCE opcode the cmd->arg1 contains index of named + * object related to an instance of external action. + * For the O_EXTERNAL_DATA opcode the cmd contains the data that can be used + * by external action handler without needing to create named instance. * * In case when eaction module uses named instances, it should register * opcode rewriting routines for O_EXTERNAL_INSTANCE opcode. The @@ -286,11 +289,13 @@ reset_eaction_obj(struct ip_fw_chain *ch, uint16_t eaction_id) /* * Since named_object related to this instance will be * also destroyed, truncate the chain of opcodes to - * remove O_EXTERNAL_INSTANCE opcode. + * remove the rest of cmd chain just after O_EXTERNAL_ACTION + * opcode. */ if (rule->act_ofs < rule->cmd_len - 1) { - EACTION_DEBUG("truncate rule %d", rule->rulenum); - rule->cmd_len--; + EACTION_DEBUG("truncate rule %d: len %u -> %u", + rule->rulenum, rule->cmd_len, rule->act_ofs + 1); + rule->cmd_len = rule->act_ofs + 1; } } IPFW_WUNLOCK(ch); diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_iface.c b/freebsd/sys/netpfil/ipfw/ip_fw_iface.c index f8973a91..f93c20db 100644 --- a/freebsd/sys/netpfil/ipfw/ip_fw_iface.c +++ b/freebsd/sys/netpfil/ipfw/ip_fw_iface.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_log.c b/freebsd/sys/netpfil/ipfw/ip_fw_log.c index 658e1256..15b611e6 100644 --- a/freebsd/sys/netpfil/ipfw/ip_fw_log.c +++ b/freebsd/sys/netpfil/ipfw/ip_fw_log.c @@ -213,6 +213,7 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen, TARG(cmd->arg1, pipe)); break; case O_FORWARD_IP: { + char buf[INET_ADDRSTRLEN]; ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; int len; struct in_addr dummyaddr; @@ -222,7 +223,7 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen, dummyaddr.s_addr = sa->sa.sin_addr.s_addr; len = snprintf(SNPARGS(action2, 0), "Forward to %s", - inet_ntoa(dummyaddr)); + inet_ntoa_r(dummyaddr, buf)); if (sa->sa.sin_port) snprintf(SNPARGS(action2, len), ":%d", @@ -265,6 +266,11 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen, snprintf(SNPARGS(action2, 0), "Call %d", cmd->arg1); break; + case O_EXTERNAL_ACTION: + snprintf(SNPARGS(action2, 0), "Eaction %s", + ((struct named_object *)SRV_OBJECT(chain, + cmd->arg1))->name); + break; default: action = "UNKNOWN"; break; diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_nat.c b/freebsd/sys/netpfil/ipfw/ip_fw_nat.c index 58bc1f3c..8baa313a 100644 --- a/freebsd/sys/netpfil/ipfw/ip_fw_nat.c +++ b/freebsd/sys/netpfil/ipfw/ip_fw_nat.c @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_private.h b/freebsd/sys/netpfil/ipfw/ip_fw_private.h index 3b483625..b6471a02 100644 --- a/freebsd/sys/netpfil/ipfw/ip_fw_private.h +++ b/freebsd/sys/netpfil/ipfw/ip_fw_private.h @@ -272,8 +272,6 @@ struct ip_fw_chain { void **srvstate; /* runtime service mappings */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t rwmtx; -#else - struct rmlock rwmtx; #endif int static_len; /* total len of static rules (v0) */ uint32_t gencnt; /* NAT generation count */ @@ -414,25 +412,23 @@ struct ipfw_ifc { #define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) #else /* FreeBSD */ #define IPFW_LOCK_INIT(_chain) do { \ - rm_init(&(_chain)->rwmtx, "IPFW static rules"); \ rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \ } while (0) #define IPFW_LOCK_DESTROY(_chain) do { \ - rm_destroy(&(_chain)->rwmtx); \ rw_destroy(&(_chain)->uh_lock); \ } while (0) -#define IPFW_RLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_RLOCKED) -#define IPFW_WLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_WLOCKED) +#define IPFW_RLOCK_ASSERT(_chain) rm_assert(&V_pfil_lock, RA_RLOCKED) +#define IPFW_WLOCK_ASSERT(_chain) rm_assert(&V_pfil_lock, RA_WLOCKED) #define IPFW_RLOCK_TRACKER struct rm_priotracker _tracker -#define IPFW_RLOCK(p) rm_rlock(&(p)->rwmtx, &_tracker) -#define IPFW_RUNLOCK(p) rm_runlock(&(p)->rwmtx, &_tracker) -#define IPFW_WLOCK(p) rm_wlock(&(p)->rwmtx) -#define IPFW_WUNLOCK(p) rm_wunlock(&(p)->rwmtx) -#define IPFW_PF_RLOCK(p) IPFW_RLOCK(p) -#define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) +#define IPFW_RLOCK(p) rm_rlock(&V_pfil_lock, &_tracker) +#define IPFW_RUNLOCK(p) rm_runlock(&V_pfil_lock, &_tracker) +#define IPFW_WLOCK(p) rm_wlock(&V_pfil_lock) +#define IPFW_WUNLOCK(p) rm_wunlock(&V_pfil_lock) +#define IPFW_PF_RLOCK(p) +#define IPFW_PF_RUNLOCK(p) #endif #define IPFW_UH_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_RLOCKED) @@ -741,10 +737,8 @@ struct table_info; typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); -int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint32_t *val); -int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, - uint16_t plen, void *paddr, uint32_t *val); +int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, + void *paddr, uint32_t *val); struct named_object *ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx); int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx); diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c b/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c index 468e4ad4..d9d37758 100644 --- a/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -1738,11 +1739,16 @@ check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci) return (EINVAL); } ci->object_opcodes++; - /* Do we have O_EXTERNAL_INSTANCE opcode? */ + /* + * Do we have O_EXTERNAL_INSTANCE or O_EXTERNAL_DATA + * opcode? + */ if (l != cmdlen) { l -= cmdlen; cmd += cmdlen; cmdlen = F_LEN(cmd); + if (cmd->opcode == O_EXTERNAL_DATA) + goto check_action; if (cmd->opcode != O_EXTERNAL_INSTANCE) { printf("ipfw: invalid opcode " "next to external action %u\n", @@ -1828,6 +1834,8 @@ check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci) break; case O_IP_SRC_LOOKUP: + if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) + goto bad_size; case O_IP_DST_LOOKUP: if (cmd->arg1 >= V_fw_tables_max) { printf("ipfw: invalid table number %d\n", @@ -2618,11 +2626,11 @@ unref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule) continue; no = rw->find_bykidx(ch, kidx); - KASSERT(no != NULL, ("table id %d not found", kidx)); + KASSERT(no != NULL, ("object id %d not found", kidx)); KASSERT(no->subtype == subtype, - ("wrong type %d (%d) for table id %d", + ("wrong type %d (%d) for object id %d", no->subtype, subtype, kidx)); - KASSERT(no->refcnt > 0, ("refcount for table %d is %d", + KASSERT(no->refcnt > 0, ("refcount for object %d is %d", kidx, no->refcnt)); if (no->refcnt == 1 && rw->destroy_object != NULL) @@ -2671,7 +2679,14 @@ ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, struct tid_info *ti, return (0); } - /* Found. Bump refcount and update kidx. */ + /* + * Object is already exist. + * Its subtype should match with expected value. + */ + if (ti->type != no->subtype) + return (EINVAL); + + /* Bump refcount and update kidx. */ no->refcnt++; rw->update(cmd, no->kidx); return (0); @@ -3137,7 +3152,7 @@ int classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx) { - if (find_op_rw(cmd, puidx, NULL) == 0) + if (find_op_rw(cmd, puidx, NULL) == NULL) return (1); return (0); } diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table.c b/freebsd/sys/netpfil/ipfw/ip_fw_table.c index 17c5f017..48a969da 100644 --- a/freebsd/sys/netpfil/ipfw/ip_fw_table.c +++ b/freebsd/sys/netpfil/ipfw/ip_fw_table.c @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include #include #include /* ip_fw.h requires IFNAMSIZ */ +#include #include #include /* struct ipfw_rule_ref */ @@ -407,7 +408,7 @@ prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, error = 0; ta_buf_sz = ta->ta_buf_size; if (count == 1) { - /* Sigle add/delete, use on-stack buffer */ + /* Single add/delete, use on-stack buffer */ memset(*ta_buf, 0, TA_BUF_SZ); ta_buf_m = *ta_buf; } else { @@ -1658,23 +1659,6 @@ ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx) no->refcnt--; } -/* - * Lookup an IP @addr in table @tbl. - * Stores found value in @val. - * - * Returns 1 if @addr was found. - */ -int -ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint32_t *val) -{ - struct table_info *ti; - - ti = KIDX_TO_TI(ch, tbl); - - return (ti->lookup(ti, &addr, sizeof(in_addr_t), val)); -} - /* * Lookup an arbtrary key @paddr of legth @plen in table @tbl. * Stores found value in @val. @@ -1682,7 +1666,7 @@ ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, * Returns 1 if key was found. */ int -ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, +ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, void *paddr, uint32_t *val) { struct table_info *ti; diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c b/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c index ef42e401..4ef70b8a 100644 --- a/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c +++ b/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include #include #include /* ip_fw.h requires IFNAMSIZ */ +#include #include #include /* struct ipfw_rule_ref */ diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c index ce666213..ae072a68 100644 --- a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c +++ b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c @@ -217,7 +217,7 @@ nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family, uint32_t n, uint32_t sn) { - memset(plog, 0, sizeof(plog)); + memset(plog, 0, sizeof(*plog)); plog->length = PFLOG_REAL_HDRLEN; plog->af = family; plog->action = PF_NAT; diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c b/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c index 36e6e268..552267be 100644 --- a/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c +++ b/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c @@ -71,7 +71,7 @@ nat64stl_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family, { static uint32_t pktid = 0; - memset(plog, 0, sizeof(plog)); + memset(plog, 0, sizeof(*plog)); plog->length = PFLOG_REAL_HDRLEN; plog->af = family; plog->action = PF_NAT; @@ -186,7 +186,7 @@ nat64stl_handle_icmp6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg, * IPv4 mapped address. */ ip6i = mtodo(m, hlen); - if (ipfw_lookup_table_extended(chain, cfg->map64, + if (ipfw_lookup_table(chain, cfg->map64, sizeof(struct in6_addr), &ip6i->ip6_dst, &tablearg) == 0) { m_freem(m); return (NAT64RETURN); @@ -206,6 +206,7 @@ ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args, { ipfw_insn *icmd; struct nat64stl_cfg *cfg; + in_addr_t dst4; uint32_t tablearg; int ret; @@ -221,11 +222,12 @@ ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args, switch (args->f_id.addr_type) { case 4: - ret = ipfw_lookup_table(chain, cfg->map46, - htonl(args->f_id.dst_ip), &tablearg); + dst4 = htonl(args->f_id.dst_ip); + ret = ipfw_lookup_table(chain, cfg->map46, sizeof(in_addr_t), + &dst4, &tablearg); break; case 6: - ret = ipfw_lookup_table_extended(chain, cfg->map64, + ret = ipfw_lookup_table(chain, cfg->map64, sizeof(struct in6_addr), &args->f_id.src_ip6, &tablearg); break; default: diff --git a/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c b/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c index 4256d028..819ba4fe 100644 --- a/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c +++ b/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c @@ -354,24 +354,24 @@ ipfw_nptv6(struct ip_fw_chain *chain, struct ip_fw_args *args, int ret; *done = 0; /* try next rule if not matched */ + ret = IP_FW_DENY; icmd = cmd + 1; if (cmd->opcode != O_EXTERNAL_ACTION || cmd->arg1 != V_nptv6_eid || icmd->opcode != O_EXTERNAL_INSTANCE || (cfg = NPTV6_LOOKUP(chain, icmd)) == NULL) - return (0); + return (ret); /* * We need act as router, so when forwarding is disabled - * do nothing. */ if (V_ip6_forwarding == 0 || args->f_id.addr_type != 6) - return (0); + return (ret); /* * NOTE: we expect ipfw_chk() did m_pullup() up to upper level * protocol's headers. Also we skip some checks, that ip6_input(), * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. */ - ret = IP_FW_DENY; ip6 = mtod(args->m, struct ip6_hdr *); NPTV6_IPDEBUG("eid %u, oid %u, %s -> %s %d", cmd->arg1, icmd->arg1, @@ -386,15 +386,15 @@ ipfw_nptv6(struct ip_fw_chain *chain, struct ip_fw_args *args, */ if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst, &cfg->internal, &cfg->mask)) - return (0); + return (ret); ret = nptv6_rewrite_internal(cfg, &args->m, 0); } else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst, &cfg->external, &cfg->mask)) ret = nptv6_rewrite_external(cfg, &args->m, 0); else - return (0); + return (ret); /* - * If address wasn't rewrited - free mbuf. + * If address wasn't rewrited - free mbuf and terminate the search. */ if (ret != 0) { if (args->m != NULL) { @@ -402,14 +402,16 @@ ipfw_nptv6(struct ip_fw_chain *chain, struct ip_fw_args *args, args->m = NULL; /* mark mbuf as consumed */ } NPTV6STAT_INC(cfg, dropped); - } - /* Terminate the search if one_pass is set */ - *done = V_fw_one_pass; - /* Update args->f_id when one_pass is off */ - if (*done == 0 && ret == 0) { - ip6 = mtod(args->m, struct ip6_hdr *); - args->f_id.src_ip6 = ip6->ip6_src; - args->f_id.dst_ip6 = ip6->ip6_dst; + *done = 1; + } else { + /* Terminate the search if one_pass is set */ + *done = V_fw_one_pass; + /* Update args->f_id when one_pass is off */ + if (*done == 0) { + ip6 = mtod(args->m, struct ip6_hdr *); + args->f_id.src_ip6 = ip6->ip6_src; + args->f_id.dst_ip6 = ip6->ip6_dst; + } } return (ret); } diff --git a/freebsd/sys/netpfil/pf/pf.c b/freebsd/sys/netpfil/pf/pf.c index 5b6be3cb..0d7a353e 100644 --- a/freebsd/sys/netpfil/pf/pf.c +++ b/freebsd/sys/netpfil/pf/pf.c @@ -131,6 +131,8 @@ VNET_DEFINE(int, pf_tcp_secret_init); #define V_pf_tcp_secret_init VNET(pf_tcp_secret_init) VNET_DEFINE(int, pf_tcp_iss_off); #define V_pf_tcp_iss_off VNET(pf_tcp_iss_off) +VNET_DECLARE(int, pf_vnet_active); +#define V_pf_vnet_active VNET(pf_vnet_active) /* * Queue for pf_intr() sends. @@ -302,6 +304,7 @@ static void pf_route6(struct mbuf **, struct pf_rule *, int, int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len); extern int pf_end_threads; +extern struct proc *pf_purge_proc; VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); @@ -1428,42 +1431,51 @@ pf_purge_thread(void *unused __unused) VNET_ITERATOR_DECL(vnet_iter); u_int idx = 0; - for (;;) { - PF_RULES_RLOCK(); - rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10); - PF_RULES_RUNLOCK(); + sx_xlock(&pf_end_lock); + while (pf_end_threads == 0) { + sx_sleep(pf_purge_thread, &pf_end_lock, 0, "pftm", hz / 10); VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - if (pf_end_threads) { - pf_end_threads++; - wakeup(pf_purge_thread); - kproc_exit(0); - } - /* Process 1/interval fraction of the state table every run. */ - idx = pf_purge_expired_states(idx, pf_hashmask / + /* Wait until V_pf_default_rule is initialized. */ + if (V_pf_vnet_active == 0) { + CURVNET_RESTORE(); + continue; + } + + /* + * Process 1/interval fraction of the state + * table every run. + */ + idx = pf_purge_expired_states(idx, pf_hashmask / (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10)); - /* Purge other expired types every PFTM_INTERVAL seconds. */ - if (idx == 0) { /* - * Order is important: - * - states and src nodes reference rules - * - states and rules reference kifs + * Purge other expired types every + * PFTM_INTERVAL seconds. */ - pf_purge_expired_fragments(); - pf_purge_expired_src_nodes(); - pf_purge_unlinked_rules(); - pfi_kif_purge(); - } - CURVNET_RESTORE(); + if (idx == 0) { + /* + * Order is important: + * - states and src nodes reference rules + * - states and rules reference kifs + */ + pf_purge_expired_fragments(); + pf_purge_expired_src_nodes(); + pf_purge_unlinked_rules(); + pfi_kif_purge(); + } + CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); } - /* not reached */ + + pf_end_threads++; + sx_xunlock(&pf_end_lock); + kproc_exit(0); } void @@ -3559,7 +3571,7 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, (counter_u64_fetch(r->states_cur) >= r->max_states)) { counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1); REASON_SET(&reason, PFRES_MAXSTATES); - return (PF_DROP); + goto csfailed; } /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || @@ -6244,6 +6256,9 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) m->m_pkthdr.rcvif->if_bridge != ifp->if_bridge))) fwdir = PF_FWD; + if (dir == PF_FWD) + dir = PF_OUT; + if (!V_pf_status.running) return (PF_PASS); diff --git a/freebsd/sys/netpfil/pf/pf_ioctl.c b/freebsd/sys/netpfil/pf/pf_ioctl.c index 076ed5f8..3cf3eec8 100644 --- a/freebsd/sys/netpfil/pf/pf_ioctl.c +++ b/freebsd/sys/netpfil/pf/pf_ioctl.c @@ -200,9 +200,11 @@ VNET_DEFINE(int, pf_vnet_active); #define V_pf_vnet_active VNET(pf_vnet_active) int pf_end_threads; +struct proc *pf_purge_proc; struct rwlock pf_rules_lock; struct sx pf_ioctl_lock; +struct sx pf_end_lock; /* pfsync */ pfsync_state_import_t *pfsync_state_import_ptr = NULL; @@ -3742,6 +3744,7 @@ pf_load(void) rw_init(&pf_rules_lock, "pf rulesets"); sx_init(&pf_ioctl_lock, "pf ioctl"); + sx_init(&pf_end_lock, "pf end thread"); pf_mtag_initialize(); @@ -3750,7 +3753,7 @@ pf_load(void) return (ENOMEM); pf_end_threads = 0; - error = kproc_create(pf_purge_thread, NULL, NULL, 0, 0, "pf purge"); + error = kproc_create(pf_purge_thread, NULL, &pf_purge_proc, 0, 0, "pf purge"); if (error != 0) return (error); @@ -3778,12 +3781,12 @@ pf_unload_vnet(void) return; } - pf_unload_vnet_purge(); - PF_RULES_WLOCK(); shutdown_pf(); PF_RULES_WUNLOCK(); + pf_unload_vnet_purge(); + pf_normalize_cleanup(); PF_RULES_WLOCK(); pfi_cleanup_vnet(); @@ -3800,11 +3803,13 @@ pf_unload(void) { int error = 0; + sx_xlock(&pf_end_lock); pf_end_threads = 1; while (pf_end_threads < 2) { wakeup_one(pf_purge_thread); - rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftmo", 0); + sx_sleep(pf_purge_proc, &pf_end_lock, 0, "pftmo", 0); } + sx_xunlock(&pf_end_lock); if (pf_dev != NULL) destroy_dev(pf_dev); @@ -3813,6 +3818,7 @@ pf_unload(void) rw_destroy(&pf_rules_lock); sx_destroy(&pf_ioctl_lock); + sx_destroy(&pf_end_lock); return (error); } diff --git a/freebsd/sys/netpfil/pf/pf_lb.c b/freebsd/sys/netpfil/pf/pf_lb.c index 033c3879..3fe8dfb6 100644 --- a/freebsd/sys/netpfil/pf/pf_lb.c +++ b/freebsd/sys/netpfil/pf/pf_lb.c @@ -555,7 +555,7 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, return (NULL); *nkp = pf_state_key_clone(*skp); if (*nkp == NULL) { - uma_zfree(V_pf_state_key_z, skp); + uma_zfree(V_pf_state_key_z, *skp); *skp = NULL; return (NULL); } diff --git a/freebsd/sys/netpfil/pf/pf_osfp.c b/freebsd/sys/netpfil/pf/pf_osfp.c index 33bef4c8..b6b51636 100644 --- a/freebsd/sys/netpfil/pf/pf_osfp.c +++ b/freebsd/sys/netpfil/pf/pf_osfp.c @@ -21,6 +21,8 @@ #include __FBSDID("$FreeBSD$"); +#include + #include #include #include @@ -36,7 +38,9 @@ __FBSDID("$FreeBSD$"); #include #include +#ifdef INET6 #include +#endif static MALLOC_DEFINE(M_PFOSFP, "pf_osfp", "pf(4) operating system fingerprints"); #define DPFPRINTF(format, x...) \ @@ -96,7 +100,11 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const st struct pf_os_fingerprint fp, *fpresult; int cnt, optlen = 0; const u_int8_t *optp; - char srcname[128]; +#ifdef INET6 + char srcname[INET6_ADDRSTRLEN]; +#else + char srcname[INET_ADDRSTRLEN]; +#endif if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN) return (NULL); @@ -112,7 +120,7 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const st fp.fp_ttl = ip->ip_ttl; if (ip->ip_off & htons(IP_DF)) fp.fp_flags |= PF_OSFP_DF; - strlcpy(srcname, inet_ntoa(ip->ip_src), sizeof(srcname)); + inet_ntoa_r(ip->ip_src, srcname); } #ifdef INET6 else if (ip6) { @@ -121,8 +129,7 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const st fp.fp_ttl = ip6->ip6_hlim; fp.fp_flags |= PF_OSFP_DF; fp.fp_flags |= PF_OSFP_INET6; - strlcpy(srcname, ip6_sprintf((struct in6_addr *)&ip6->ip6_src), - sizeof(srcname)); + ip6_sprintf(srcname, (const struct in6_addr *)&ip6->ip6_src); } #endif else diff --git a/freebsd/sys/opencrypto/crypto.c b/freebsd/sys/opencrypto/crypto.c index 2d9787bd..f5119aa6 100644 --- a/freebsd/sys/opencrypto/crypto.c +++ b/freebsd/sys/opencrypto/crypto.c @@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -76,6 +77,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include /* XXX for M_XDATA */ @@ -188,6 +190,37 @@ SYSCTL_INT(_debug, OID_AUTO, crypto_timing, CTLFLAG_RW, &crypto_timing, 0, "Enable/disable crypto timing support"); #endif +/* Try to avoid directly exposing the key buffer as a symbol */ +static struct keybuf *keybuf; + +static struct keybuf empty_keybuf = { + .kb_nents = 0 +}; + +/* Obtain the key buffer from boot metadata */ +static void +keybuf_init(void) +{ + caddr_t kmdp; + + kmdp = preload_search_by_type("elf kernel"); + + if (kmdp == NULL) + kmdp = preload_search_by_type("elf64 kernel"); + + keybuf = (struct keybuf *)preload_search_info(kmdp, + MODINFO_METADATA | MODINFOMD_KEYBUF); + + if (keybuf == NULL) + keybuf = &empty_keybuf; +} + +/* It'd be nice if we could store these in some kind of secure memory... */ +struct keybuf * get_keybuf(void) { + + return (keybuf); +} + static int crypto_init(void) { @@ -240,6 +273,9 @@ crypto_init(void) error); goto bad; } + + keybuf_init(); + return 0; bad: crypto_destroy(); @@ -289,7 +325,7 @@ crypto_destroy(void) /* XXX flush queues??? */ - /* + /* * Reclaim dynamically allocated resources. */ if (crypto_drivers != NULL) diff --git a/freebsd/sys/powerpc/include/machine/cpufunc.h b/freebsd/sys/powerpc/include/machine/cpufunc.h index 378274b6..c70a94d3 100644 --- a/freebsd/sys/powerpc/include/machine/cpufunc.h +++ b/freebsd/sys/powerpc/include/machine/cpufunc.h @@ -201,7 +201,7 @@ intr_restore(register_t msr) } static __inline struct pcpu * -powerpc_get_pcpup(void) +get_pcpu(void) { struct pcpu *ret; diff --git a/freebsd/sys/powerpc/include/machine/intr_machdep.h b/freebsd/sys/powerpc/include/machine/intr_machdep.h new file mode 100644 index 00000000..5fbf9ee0 --- /dev/null +++ b/freebsd/sys/powerpc/include/machine/intr_machdep.h @@ -0,0 +1,64 @@ +/*- + * Copyright (C) 2002 Benno Rice. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MACHINE_INTR_MACHDEP_H_ +#define _MACHINE_INTR_MACHDEP_H_ + +#define INTR_VECTORS 256 + +#define MAX_PICS 16 +#define MAP_IRQ(node, pin) powerpc_get_irq(node, pin) + +/* + * Default base address for MSI messages on PowerPC + */ +#define MSI_INTEL_ADDR_BASE 0xfee00000 + +extern device_t root_pic; + +struct trapframe; + +driver_filter_t powerpc_ipi_handler; + +void intrcnt_add(const char *name, u_long **countp); + +void powerpc_register_pic(device_t, uint32_t, u_int, u_int, u_int); +u_int powerpc_get_irq(uint32_t, u_int); + +void powerpc_dispatch_intr(u_int, struct trapframe *); +int powerpc_enable_intr(void); +int powerpc_setup_intr(const char *, u_int, driver_filter_t, driver_intr_t, + void *, enum intr_type, void **); +int powerpc_teardown_intr(void *); +int powerpc_bind_intr(u_int irq, u_char cpu); +int powerpc_config_intr(int, enum intr_trigger, enum intr_polarity); +int powerpc_fw_config_intr(int irq, int sense_code); + +void powerpc_intr_mask(u_int irq); +void powerpc_intr_unmask(u_int irq); + +#endif /* _MACHINE_INTR_MACHDEP_H_ */ diff --git a/freebsd/sys/powerpc/include/machine/psl.h b/freebsd/sys/powerpc/include/machine/psl.h index f0a0fa4c..bb4756f0 100644 --- a/freebsd/sys/powerpc/include/machine/psl.h +++ b/freebsd/sys/powerpc/include/machine/psl.h @@ -50,6 +50,10 @@ #define PSL_PMM 0x00000004UL /* performance monitor mark */ /* Machine State Register - Book-E cores */ +#ifdef __powerpc64__ +#define PSL_CM 0x80000000UL /* Computation Mode (64-bit) */ +#endif + #define PSL_UCLE 0x04000000UL /* User mode cache lock enable */ #define PSL_WE 0x00040000UL /* Wait state enable */ #define PSL_CE 0x00020000UL /* Critical interrupt enable */ @@ -86,7 +90,11 @@ #if defined(BOOKE_E500) /* Initial kernel MSR, use IS=1 ad DS=1. */ #define PSL_KERNSET_INIT (PSL_IS | PSL_DS) +#ifdef __powerpc64__ +#define PSL_KERNSET (PSL_CM | PSL_CE | PSL_ME | PSL_EE) +#else #define PSL_KERNSET (PSL_CE | PSL_ME | PSL_EE) +#endif #define PSL_SRR1_MASK 0x00000000UL /* No mask on Book-E */ #elif defined(BOOKE_PPC4XX) #define PSL_KERNSET (PSL_CE | PSL_ME | PSL_EE | PSL_FP) diff --git a/freebsd/sys/powerpc/include/machine/spr.h b/freebsd/sys/powerpc/include/machine/spr.h index 35bd9009..cc5d0b75 100644 --- a/freebsd/sys/powerpc/include/machine/spr.h +++ b/freebsd/sys/powerpc/include/machine/spr.h @@ -192,6 +192,18 @@ #define FSL_E5500 0x8024 #define FSL_E6500 0x8040 +#define SPR_EPCR 0x133 +#define EPCR_EXTGS 0x80000000 +#define EPCR_DTLBGS 0x40000000 +#define EPCR_ITLBGS 0x20000000 +#define EPCR_DSIGS 0x10000000 +#define EPCR_ISIGS 0x08000000 +#define EPCR_DUVGS 0x04000000 +#define EPCR_ICM 0x02000000 +#define EPCR_GICMGS 0x01000000 +#define EPCR_DGTMI 0x00800000 +#define EPCR_DMIUH 0x00400000 +#define EPCR_PMGS 0x00200000 #define SPR_SPEFSCR 0x200 /* ..8 Signal Processing Engine FSCR. */ #define SPR_IBAT0U 0x210 /* .68 Instruction BAT Reg 0 Upper */ #define SPR_IBAT0U 0x210 /* .6. Instruction BAT Reg 0 Upper */ @@ -259,6 +271,7 @@ #define SPR_DBAT6L 0x23d /* .6. Data BAT Reg 6 Lower */ #define SPR_DBAT7U 0x23e /* .6. Data BAT Reg 7 Upper */ #define SPR_DBAT7L 0x23f /* .6. Data BAT Reg 7 Lower */ +#define SPR_SPRG8 0x25c /* ..8 SPR General 8 */ #define SPR_MI_CTR 0x310 /* ..8 IMMU control */ #define Mx_CTR_GPM 0x80000000 /* Group Protection Mode */ #define Mx_CTR_PPM 0x40000000 /* Page Protection Mode */ @@ -671,6 +684,8 @@ #define SPR_CSRR1 0x03b /* ..8 59 Critical SRR1 */ #define SPR_MCSRR0 0x23a /* ..8 570 Machine check SRR0 */ #define SPR_MCSRR1 0x23b /* ..8 571 Machine check SRR1 */ +#define SPR_DSRR0 0x23e /* ..8 574 Debug SRR0 */ +#define SPR_DSRR1 0x23f /* ..8 575 Debug SRR1 */ #define SPR_MMUCR 0x3b2 /* 4.. MMU Control Register */ #define MMUCR_SWOA (0x80000000 >> 7) diff --git a/freebsd/sys/security/audit/audit.h b/freebsd/sys/security/audit/audit.h index 4483d1b3..12cda7b5 100644 --- a/freebsd/sys/security/audit/audit.h +++ b/freebsd/sys/security/audit/audit.h @@ -1,7 +1,13 @@ /*- * Copyright (c) 1999-2005 Apple Inc. + * Copyright (c) 2016-2017 Robert N. M. Watson * All rights reserved. * + * This software was developed by BAE Systems, the University of Cambridge + * Computer Laboratory, and Memorial University under DARPA/AFRL contract + * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing + * (TC) research program. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -100,7 +106,9 @@ void audit_arg_auid(uid_t auid); void audit_arg_auditinfo(struct auditinfo *au_info); void audit_arg_auditinfo_addr(struct auditinfo_addr *au_info); void audit_arg_upath1(struct thread *td, int dirfd, char *upath); +void audit_arg_upath1_canon(char *upath); void audit_arg_upath2(struct thread *td, int dirfd, char *upath); +void audit_arg_upath2_canon(char *upath); void audit_arg_vnode1(struct vnode *vp); void audit_arg_vnode2(struct vnode *vp); void audit_arg_text(char *text); @@ -109,6 +117,7 @@ void audit_arg_svipc_cmd(int cmd); void audit_arg_svipc_perm(struct ipc_perm *perm); void audit_arg_svipc_id(int id); void audit_arg_svipc_addr(void *addr); +void audit_arg_svipc_which(int which); void audit_arg_posix_ipc_perm(uid_t uid, gid_t gid, mode_t mode); void audit_arg_auditon(union auditon_udata *udata); void audit_arg_file(struct proc *p, struct file *fp); @@ -232,6 +241,11 @@ void audit_thread_free(struct thread *td); audit_arg_pid((pid)); \ } while (0) +#define AUDIT_ARG_POSIX_IPC_PERM(uid, gid, mode) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_posix_ipc_perm((uid), (gid), (mod)); \ +} while (0) + #define AUDIT_ARG_PROCESS(p) do { \ if (AUDITING_TD(curthread)) \ audit_arg_process((p)); \ @@ -282,6 +296,31 @@ void audit_thread_free(struct thread *td); audit_arg_suid((suid)); \ } while (0) +#define AUDIT_ARG_SVIPC_CMD(cmd) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_svipc_cmd((cmd)); \ +} while (0) + +#define AUDIT_ARG_SVIPC_PERM(perm) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_svipc_perm((perm)); \ +} while (0) + +#define AUDIT_ARG_SVIPC_ID(id) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_svipc_id((id)); \ +} while (0) + +#define AUDIT_ARG_SVIPC_ADDR(addr) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_svipc_addr((addr)); \ +} while (0) + +#define AUDIT_ARG_SVIPC_WHICH(which) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_svipc_which((which)); \ +} while (0) + #define AUDIT_ARG_TEXT(text) do { \ if (AUDITING_TD(curthread)) \ audit_arg_text((text)); \ @@ -297,11 +336,21 @@ void audit_thread_free(struct thread *td); audit_arg_upath1((td), (dirfd), (upath)); \ } while (0) +#define AUDIT_ARG_UPATH1_CANON(upath) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_upath1_canon((upath)); \ +} while (0) + #define AUDIT_ARG_UPATH2(td, dirfd, upath) do { \ if (AUDITING_TD(curthread)) \ audit_arg_upath2((td), (dirfd), (upath)); \ } while (0) +#define AUDIT_ARG_UPATH2_CANON(upath) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_upath2_canon((upath)); \ +} while (0) + #define AUDIT_ARG_VALUE(value) do { \ if (AUDITING_TD(curthread)) \ audit_arg_value((value)); \ @@ -363,6 +412,7 @@ void audit_thread_free(struct thread *td); #define AUDIT_ARG_MODE(mode) #define AUDIT_ARG_OWNER(uid, gid) #define AUDIT_ARG_PID(pid) +#define AUDIT_ARG_POSIX_IPC_PERM(uid, gid, mode) #define AUDIT_ARG_PROCESS(p) #define AUDIT_ARG_RGID(rgid) #define AUDIT_ARG_RIGHTS(rights) @@ -373,10 +423,17 @@ void audit_thread_free(struct thread *td); #define AUDIT_ARG_SOCKET(sodomain, sotype, soprotocol) #define AUDIT_ARG_SOCKADDR(td, dirfd, sa) #define AUDIT_ARG_SUID(suid) +#define AUDIT_ARG_SVIPC_CMD(cmd) +#define AUDIT_ARG_SVIPC_PERM(perm) +#define AUDIT_ARG_SVIPC_ID(id) +#define AUDIT_ARG_SVIPC_ADDR(addr) +#define AUDIT_ARG_SVIPC_WHICH(which) #define AUDIT_ARG_TEXT(text) #define AUDIT_ARG_UID(uid) #define AUDIT_ARG_UPATH1(td, dirfd, upath) +#define AUDIT_ARG_UPATH1_CANON(upath) #define AUDIT_ARG_UPATH2(td, dirfd, upath) +#define AUDIT_ARG_UPATH2_CANON(upath) #define AUDIT_ARG_VALUE(value) #define AUDIT_ARG_VNODE1(vp) #define AUDIT_ARG_VNODE2(vp) diff --git a/freebsd/sys/sparc64/sparc64/in_cksum.c b/freebsd/sys/sparc64/sparc64/in_cksum.c index 845688a5..ab8ab600 100644 --- a/freebsd/sys/sparc64/sparc64/in_cksum.c +++ b/freebsd/sys/sparc64/sparc64/in_cksum.c @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/_callout.h b/freebsd/sys/sys/_callout.h index a9134c8d..650073d3 100644 --- a/freebsd/sys/sys/_callout.h +++ b/freebsd/sys/sys/_callout.h @@ -15,7 +15,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/_sockaddr_storage.h b/freebsd/sys/sys/_sockaddr_storage.h index 5c0048b5..189e16d8 100644 --- a/freebsd/sys/sys/_sockaddr_storage.h +++ b/freebsd/sys/sys/_sockaddr_storage.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/ata.h b/freebsd/sys/sys/ata.h index 72104140..9737487d 100644 --- a/freebsd/sys/sys/ata.h +++ b/freebsd/sys/sys/ata.h @@ -682,7 +682,7 @@ struct atapi_sense { #define ATA_IDL_ATA_STRINGS 0x05 /* ATA Strings */ #define ATA_IDL_SECURITY 0x06 /* Security */ #define ATA_IDL_PARALLEL_ATA 0x07 /* Parallel ATA */ -#define ATA_IDL_SERIAL_ATA 0x08 /* Seiral ATA */ +#define ATA_IDL_SERIAL_ATA 0x08 /* Serial ATA */ #define ATA_IDL_ZDI 0x09 /* Zoned Device Information */ struct ata_gp_log_dir { diff --git a/freebsd/sys/sys/bitstring.h b/freebsd/sys/sys/bitstring.h index 32465d11..18c91878 100644 --- a/freebsd/sys/sys/bitstring.h +++ b/freebsd/sys/sys/bitstring.h @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/buf.h b/freebsd/sys/sys/buf.h index 8688056f..ea6019c1 100644 --- a/freebsd/sys/sys/buf.h +++ b/freebsd/sys/sys/buf.h @@ -15,7 +15,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/buf_ring.h b/freebsd/sys/sys/buf_ring.h index 88c28157..86534564 100644 --- a/freebsd/sys/sys/buf_ring.h +++ b/freebsd/sys/sys/buf_ring.h @@ -250,16 +250,16 @@ buf_ring_advance_sc(struct buf_ring *br) /* * Used to return a buffer (most likely already there) - * to the top od the ring. The caller should *not* + * to the top of the ring. The caller should *not* * have used any dequeue to pull it out of the ring * but instead should have used the peek() function. * This is normally used where the transmit queue - * of a driver is full, and an mubf must be returned. + * of a driver is full, and an mbuf must be returned. * Most likely whats in the ring-buffer is what * is being put back (since it was not removed), but * sometimes the lower transmit function may have * done a pullup or other function that will have - * changed it. As an optimzation we always put it + * changed it. As an optimization we always put it * back (since jhb says the store is probably cheaper), * if we have to do a multi-queue version we will need * the compare and an atomic. diff --git a/freebsd/sys/sys/bufobj.h b/freebsd/sys/sys/bufobj.h index 657702c1..2dc440a6 100644 --- a/freebsd/sys/sys/bufobj.h +++ b/freebsd/sys/sys/bufobj.h @@ -88,6 +88,12 @@ struct buf_ops { #define BO_WRITE(bo, bp) ((bo)->bo_ops->bop_write((bp))) #define BO_BDFLUSH(bo, bp) ((bo)->bo_ops->bop_bdflush((bo), (bp))) +/* + * Locking notes: + * 'S' is sync_mtx + * 'v' is the vnode lock which embeds the bufobj. + * '-' Constant and unchanging after initialization. + */ struct bufobj { struct rwlock bo_lock; /* Lock which protects "i" things */ struct buf_ops *bo_ops; /* - Buffer operations */ diff --git a/freebsd/sys/sys/bus.h b/freebsd/sys/sys/bus.h index e62c9bab..61cb5b9c 100644 --- a/freebsd/sys/sys/bus.h +++ b/freebsd/sys/sys/bus.h @@ -265,6 +265,7 @@ enum intr_type { }; enum intr_trigger { + INTR_TRIGGER_INVALID = -1, INTR_TRIGGER_CONFORM = 0, INTR_TRIGGER_EDGE = 1, INTR_TRIGGER_LEVEL = 2 @@ -392,14 +393,14 @@ int resource_list_print_type(struct resource_list *rl, const char *format); /* - * The root bus, to which all top-level busses are attached. + * The root bus, to which all top-level buses are attached. */ extern device_t root_bus; extern devclass_t root_devclass; void root_bus_configure(void); /* - * Useful functions for implementing busses. + * Useful functions for implementing buses. */ int bus_generic_activate_resource(device_t dev, device_t child, int type, @@ -662,7 +663,7 @@ void bus_data_generation_update(void); * Some convenience defines for probe routines to return. These are just * suggested values, and there's nothing magical about them. * BUS_PROBE_SPECIFIC is for devices that cannot be reprobed, and that no - * possible other driver may exist (typically legacy drivers who don't fallow + * possible other driver may exist (typically legacy drivers who don't follow * all the rules, or special needs drivers). BUS_PROBE_VENDOR is the * suggested value that vendor supplied drivers use. This is for source or * binary drivers that are not yet integrated into the FreeBSD tree. Its use @@ -675,7 +676,7 @@ void bus_data_generation_update(void); * supports the newer ones would return BUS_PROBE_DEFAULT. BUS_PROBE_GENERIC * is for drivers that wish to have a generic form and a specialized form, * like is done with the pci bus and the acpi pci bus. BUS_PROBE_HOOVER is - * for those busses that implement a generic device place-holder for devices on + * for those buses that implement a generic device placeholder for devices on * the bus that have no more specific driver for them (aka ugen). * BUS_PROBE_NOWILDCARD or lower means that the device isn't really bidding * for a device node, but accepts only devices that its parent has told it @@ -699,7 +700,7 @@ void bus_data_generation_update(void); * probed in earlier passes. */ #define BUS_PASS_ROOT 0 /* Used to attach root0. */ -#define BUS_PASS_BUS 10 /* Busses and bridges. */ +#define BUS_PASS_BUS 10 /* Buses and bridges. */ #define BUS_PASS_CPU 20 /* CPU devices. */ #define BUS_PASS_RESOURCE 30 /* Resource discovery. */ #define BUS_PASS_INTERRUPT 40 /* Interrupt controllers. */ @@ -734,7 +735,7 @@ struct module; int driver_module_handler(struct module *, int, void *); /** - * Module support for automatically adding drivers to busses. + * Module support for automatically adding drivers to buses. */ struct driver_module_data { int (*dmd_chainevh)(struct module *, int, void *); diff --git a/freebsd/sys/sys/callout.h b/freebsd/sys/sys/callout.h index f58fa587..b219052f 100644 --- a/freebsd/sys/sys/callout.h +++ b/freebsd/sys/sys/callout.h @@ -15,7 +15,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/capability.h b/freebsd/sys/sys/capability.h index 8b1c229f..3bdaf203 100644 --- a/freebsd/sys/sys/capability.h +++ b/freebsd/sys/sys/capability.h @@ -38,6 +38,7 @@ #ifndef _SYS_CAPABILITY_H_ #define _SYS_CAPABILITY_H_ +#warning this file includes which is deprecated #include #endif /* !_SYS_CAPABILITY_H_ */ diff --git a/freebsd/sys/sys/conf.h b/freebsd/sys/sys/conf.h index 084cab22..8d39c629 100644 --- a/freebsd/sys/sys/conf.h +++ b/freebsd/sys/sys/conf.h @@ -17,7 +17,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/ctype.h b/freebsd/sys/sys/ctype.h index 5022d40c..57b848b5 100644 --- a/freebsd/sys/sys/ctype.h +++ b/freebsd/sys/sys/ctype.h @@ -15,7 +15,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/domain.h b/freebsd/sys/sys/domain.h index 1817e788..64da7ed7 100644 --- a/freebsd/sys/sys/domain.h +++ b/freebsd/sys/sys/domain.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/eventvar.h b/freebsd/sys/sys/eventvar.h index c7e46230..335a6191 100644 --- a/freebsd/sys/sys/eventvar.h +++ b/freebsd/sys/sys/eventvar.h @@ -30,7 +30,7 @@ #define _SYS_EVENTVAR_H_ #ifndef _KERNEL -#error "no user-servicable parts inside" +#error "no user-serviceable parts inside" #endif #include diff --git a/freebsd/sys/sys/file.h b/freebsd/sys/sys/file.h index 4fcbbde0..092362b4 100644 --- a/freebsd/sys/sys/file.h +++ b/freebsd/sys/sys/file.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -50,11 +50,10 @@ struct thread; struct uio; struct knote; struct vnode; -struct socket; - #endif /* _KERNEL */ +#define DTYPE_NONE 0 /* not yet initialized */ #define DTYPE_VNODE 1 /* file */ #define DTYPE_SOCKET 2 /* communications endpoint */ #define DTYPE_PIPE 3 /* pipe */ @@ -68,6 +67,7 @@ struct socket; #define DTYPE_DEV 11 /* Device specific fd type */ #define DTYPE_PROCDESC 12 /* process descriptor */ #define DTYPE_LINUXEFD 13 /* emulation eventfd type */ +#define DTYPE_LINUXTFD 14 /* emulation timerfd type */ #ifdef _KERNEL @@ -392,10 +392,6 @@ int fgetvp_read(struct thread *td, int fd, cap_rights_t *rightsp, int fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp); -int fgetsock(struct thread *td, int fd, cap_rights_t *rightsp, - struct socket **spp, u_int *fflagp); -void fputsock(struct socket *sp); - static __inline int _fnoop(void) { diff --git a/freebsd/sys/sys/filedesc.h b/freebsd/sys/sys/filedesc.h index 1e99bfae..6b4dd1ee 100644 --- a/freebsd/sys/sys/filedesc.h +++ b/freebsd/sys/sys/filedesc.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/gtaskqueue.h b/freebsd/sys/sys/gtaskqueue.h new file mode 100644 index 00000000..c9b34e4f --- /dev/null +++ b/freebsd/sys/sys/gtaskqueue.h @@ -0,0 +1,124 @@ +/*- + * Copyright (c) 2014 Jeffrey Roberson + * Copyright (c) 2016 Matthew Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_GTASKQUEUE_H_ +#define _SYS_GTASKQUEUE_H_ +#include + +#ifndef _KERNEL +#error "no user-serviceable parts inside" +#endif + +struct gtaskqueue; +typedef void (*gtaskqueue_enqueue_fn)(void *context); + +/* + * Taskqueue groups. Manages dynamic thread groups and irq binding for + * device and other tasks. + */ + +void gtaskqueue_block(struct gtaskqueue *queue); +void gtaskqueue_unblock(struct gtaskqueue *queue); + +int gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask); +void gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *task); +void gtaskqueue_drain_all(struct gtaskqueue *queue); + +int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *task); +void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *grptask, + void *uniq, int irq, char *name); +int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *grptask, + void *uniq, int cpu, int irq, char *name); +void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask); +struct taskqgroup *taskqgroup_create(char *name); +void taskqgroup_destroy(struct taskqgroup *qgroup); +int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride); + +#define TASK_ENQUEUED 0x1 +#define TASK_SKIP_WAKEUP 0x2 + + +#define GTASK_INIT(task, flags, priority, func, context) do { \ + (task)->ta_flags = flags; \ + (task)->ta_priority = (priority); \ + (task)->ta_func = (func); \ + (task)->ta_context = (context); \ +} while (0) + +#define GROUPTASK_INIT(gtask, priority, func, context) \ + GTASK_INIT(&(gtask)->gt_task, TASK_SKIP_WAKEUP, priority, func, context) + +#define GROUPTASK_ENQUEUE(gtask) \ + grouptaskqueue_enqueue((gtask)->gt_taskqueue, &(gtask)->gt_task) + +#define TASKQGROUP_DECLARE(name) \ +extern struct taskqgroup *qgroup_##name + +#ifdef EARLY_AP_STARTUP +#define TASKQGROUP_DEFINE(name, cnt, stride) \ + \ +struct taskqgroup *qgroup_##name; \ + \ +static void \ +taskqgroup_define_##name(void *arg) \ +{ \ + qgroup_##name = taskqgroup_create(#name); \ + taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ +} \ + \ +SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \ + taskqgroup_define_##name, NULL) +#else /* !EARLY_AP_STARTUP */ +#define TASKQGROUP_DEFINE(name, cnt, stride) \ + \ +struct taskqgroup *qgroup_##name; \ + \ +static void \ +taskqgroup_define_##name(void *arg) \ +{ \ + qgroup_##name = taskqgroup_create(#name); \ +} \ + \ +SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \ + taskqgroup_define_##name, NULL); \ + \ +static void \ +taskqgroup_adjust_##name(void *arg) \ +{ \ + taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ +} \ + \ +SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \ + taskqgroup_adjust_##name, NULL) +#endif /* EARLY_AP_STARTUP */ + +TASKQGROUP_DECLARE(net); +TASKQGROUP_DECLARE(softirq); + +#endif /* !_SYS_GTASKQUEUE_H_ */ diff --git a/freebsd/sys/sys/kernel.h b/freebsd/sys/sys/kernel.h index 8f8f4ea7..3ca8af6e 100644 --- a/freebsd/sys/sys/kernel.h +++ b/freebsd/sys/sys/kernel.h @@ -394,7 +394,7 @@ struct tunable_uint64 { uint64_t *var; }; #define TUNABLE_UINT64(path, var) \ - static struct tunable_ulong __CONCAT(__tunable_uint64_, __LINE__) = { \ + static struct tunable_uint64 __CONCAT(__tunable_uint64_, __LINE__) = { \ (path), \ (var), \ }; \ diff --git a/freebsd/sys/sys/libkern.h b/freebsd/sys/sys/libkern.h index c8fcd877..5986a740 100644 --- a/freebsd/sys/sys/libkern.h +++ b/freebsd/sys/sys/libkern.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -53,9 +53,36 @@ extern u_char const bcd2bin_data[]; extern u_char const bin2bcd_data[]; extern char const hex2ascii_data[]; -#define bcd2bin(bcd) (bcd2bin_data[bcd]) -#define bin2bcd(bin) (bin2bcd_data[bin]) -#define hex2ascii(hex) (hex2ascii_data[hex]) +#define LIBKERN_LEN_BCD2BIN 154 +#define LIBKERN_LEN_BIN2BCD 100 +#define LIBKERN_LEN_HEX2ASCII 36 + +static inline u_char +bcd2bin(int bcd) +{ + + KASSERT(bcd >= 0 && bcd < LIBKERN_LEN_BCD2BIN, + ("invalid bcd %d", bcd)); + return (bcd2bin_data[bcd]); +} + +static inline u_char +bin2bcd(int bin) +{ + + KASSERT(bin >= 0 && bin < LIBKERN_LEN_BIN2BCD, + ("invalid bin %d", bin)); + return (bin2bcd_data[bin]); +} + +static inline char +hex2ascii(int hex) +{ + + KASSERT(hex >= 0 && hex < LIBKERN_LEN_HEX2ASCII, + ("invalid hex %d", hex)); + return (hex2ascii_data[hex]); +} static __inline int imax(int a, int b) { return (a > b ? a : b); } static __inline int imin(int a, int b) { return (a < b ? a : b); } @@ -231,6 +258,11 @@ crc32(const void *buf, size_t size) uint32_t calculate_crc32c(uint32_t crc32c, const unsigned char *buffer, unsigned int length); +#ifdef _KERNEL +#if defined(__amd64__) || defined(__i386__) +uint32_t sse42_crc32c(uint32_t, const unsigned char *, unsigned); +#endif +#endif LIBKERN_INLINE void *memset(void *, int, size_t); diff --git a/freebsd/sys/sys/limits.h b/freebsd/sys/sys/limits.h index edb21d8d..fdf98ba3 100644 --- a/freebsd/sys/sys/limits.h +++ b/freebsd/sys/sys/limits.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/linker.h b/freebsd/sys/sys/linker.h index eecb4cdd..330af02e 100644 --- a/freebsd/sys/sys/linker.h +++ b/freebsd/sys/sys/linker.h @@ -143,7 +143,7 @@ int linker_file_foreach(linker_predicate_t *_predicate, void *_context); * Lookup a symbol in a file. If deps is TRUE, look in dependencies * if not found in file. */ -caddr_t linker_file_lookup_symbol(linker_file_t _file, const char* _name, +caddr_t linker_file_lookup_symbol(linker_file_t _file, const char* _name, int _deps); /* @@ -157,7 +157,7 @@ int linker_file_lookup_set(linker_file_t _file, const char *_name, /* * List all functions in a file. */ -int linker_file_function_listall(linker_file_t, +int linker_file_function_listall(linker_file_t, linker_function_nameval_callback_t, void *); /* @@ -217,6 +217,7 @@ void *linker_hwpmc_list_objects(void); #define MODINFOMD_CTORS_ADDR 0x000a /* address of .ctors */ #define MODINFOMD_CTORS_SIZE 0x000b /* size of .ctors */ #define MODINFOMD_FW_HANDLE 0x000c /* Firmware dependent handle */ +#define MODINFOMD_KEYBUF 0x000d /* Crypto key intake buffer */ #define MODINFOMD_NOCOPY 0x8000 /* don't copy this metadata to the kernel */ #define MODINFOMD_DEPLIST (0x4001 | MODINFOMD_NOCOPY) /* depends on */ diff --git a/freebsd/sys/sys/lockmgr.h b/freebsd/sys/sys/lockmgr.h index 3019e4c4..60749228 100644 --- a/freebsd/sys/sys/lockmgr.h +++ b/freebsd/sys/sys/lockmgr.h @@ -68,6 +68,10 @@ struct thread; */ int __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk, const char *wmesg, int prio, int timo, const char *file, int line); +int lockmgr_lock_fast_path(struct lock *lk, u_int flags, + struct lock_object *ilk, const char *file, int line); +int lockmgr_unlock_fast_path(struct lock *lk, u_int flags, + struct lock_object *ilk); #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void _lockmgr_assert(const struct lock *lk, int what, const char *file, int line); #endif diff --git a/freebsd/sys/sys/lockstat.h b/freebsd/sys/sys/lockstat.h index 1fc79ffe..705f3bb5 100644 --- a/freebsd/sys/sys/lockstat.h +++ b/freebsd/sys/sys/lockstat.h @@ -68,7 +68,7 @@ SDT_PROBE_DECLARE(lockstat, , , thread__spin); #define LOCKSTAT_WRITER 0 #define LOCKSTAT_READER 1 -extern int lockstat_enabled; +extern volatile int lockstat_enabled; #ifdef KDTRACE_HOOKS @@ -107,6 +107,13 @@ extern int lockstat_enabled; LOCKSTAT_RECORD1(probe, lp, a); \ } while (0) +#ifndef LOCK_PROFILING +#define LOCKSTAT_PROFILE_ENABLED(probe) __predict_false(lockstat_enabled) +#define LOCKSTAT_OOL_PROFILE_ENABLED(probe) LOCKSTAT_PROFILE_ENABLED(probe) +#else +#define LOCKSTAT_OOL_PROFILE_ENABLED(probe) 1 +#endif + struct lock_object; uint64_t lockstat_nsecs(struct lock_object *); @@ -130,6 +137,12 @@ uint64_t lockstat_nsecs(struct lock_object *); #define LOCKSTAT_PROFILE_RELEASE_RWLOCK(probe, lp, a) \ LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp) +#ifndef LOCK_PROFILING +#define LOCKSTAT_PROFILE_ENABLED(probe) 0 +#endif +#define LOCKSTAT_OOL_PROFILE_ENABLED(probe) 1 + #endif /* !KDTRACE_HOOKS */ + #endif /* _KERNEL */ #endif /* _SYS_LOCKSTAT_H */ diff --git a/freebsd/sys/sys/malloc.h b/freebsd/sys/sys/malloc.h index 310d2551..c59456de 100644 --- a/freebsd/sys/sys/malloc.h +++ b/freebsd/sys/sys/malloc.h @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/mbuf.h b/freebsd/sys/sys/mbuf.h index 9ce52a17..4631a535 100644 --- a/freebsd/sys/sys/mbuf.h +++ b/freebsd/sys/sys/mbuf.h @@ -129,6 +129,14 @@ struct m_tag { void (*m_tag_free)(struct m_tag *); }; +/* + * Static network interface owned tag. + * Allocated through ifp->if_snd_tag_alloc(). + */ +struct m_snd_tag { + struct ifnet *ifp; /* network interface tag belongs to */ +}; + /* * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. * Size ILP32: 48 @@ -137,7 +145,10 @@ struct m_tag { * they are correct. */ struct pkthdr { - struct ifnet *rcvif; /* rcv interface */ + union { + struct m_snd_tag *snd_tag; /* send tag, if any */ + struct ifnet *rcvif; /* rcv interface */ + }; SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ int32_t len; /* total packet length */ @@ -607,7 +618,7 @@ struct mbuf *m_getjcl(int, short, int, int); struct mbuf *m_getm2(struct mbuf *, int, int, short, int); struct mbuf *m_getptr(struct mbuf *, int, int *); u_int m_length(struct mbuf *, struct mbuf **); -int m_mbuftouio(struct uio *, struct mbuf *, int); +int m_mbuftouio(struct uio *, const struct mbuf *, int); void m_move_pkthdr(struct mbuf *, struct mbuf *); int m_pkthdr_init(struct mbuf *, int); struct mbuf *m_prepend(struct mbuf *, int, int); @@ -1314,5 +1325,18 @@ mbufq_prepend(struct mbufq *mq, struct mbuf *m) STAILQ_INSERT_HEAD(&mq->mq_head, m, m_stailqpkt); mq->mq_len++; } + +/* + * Note: this doesn't enforce the maximum list size for dst. + */ +static inline void +mbufq_concat(struct mbufq *mq_dst, struct mbufq *mq_src) +{ + + mq_dst->mq_len += mq_src->mq_len; + STAILQ_CONCAT(&mq_dst->mq_head, &mq_src->mq_head); + mq_src->mq_len = 0; +} + #endif /* _KERNEL */ #endif /* !_SYS_MBUF_H_ */ diff --git a/freebsd/sys/sys/mount.h b/freebsd/sys/sys/mount.h index acc9b81b..e132159d 100644 --- a/freebsd/sys/sys/mount.h +++ b/freebsd/sys/sys/mount.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/mutex.h b/freebsd/sys/sys/mutex.h index 3cec5dbd..3b5d3834 100644 --- a/freebsd/sys/sys/mutex.h +++ b/freebsd/sys/sys/mutex.h @@ -104,13 +104,19 @@ void mtx_sysinit(void *arg); int _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line); void mutex_init(void); -void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, - const char *file, int line); +#if LOCK_DEBUG > 0 +void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, uintptr_t tid, + int opts, const char *file, int line); void __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line); +#else +void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, uintptr_t tid); +void __mtx_unlock_sleep(volatile uintptr_t *c); +#endif + #ifdef SMP -void _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts, - const char *file, int line); +void _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v, uintptr_t tid, + int opts, const char *file, int line); #endif void __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line); @@ -167,13 +173,20 @@ void thread_lock_flags_(struct thread *, int, const char *, int); _mtx_destroy(&(m)->mtx_lock) #define mtx_trylock_flags_(m, o, f, l) \ _mtx_trylock_flags_(&(m)->mtx_lock, o, f, l) -#define _mtx_lock_sleep(m, t, o, f, l) \ - __mtx_lock_sleep(&(m)->mtx_lock, t, o, f, l) +#if LOCK_DEBUG > 0 +#define _mtx_lock_sleep(m, v, t, o, f, l) \ + __mtx_lock_sleep(&(m)->mtx_lock, v, t, o, f, l) #define _mtx_unlock_sleep(m, o, f, l) \ __mtx_unlock_sleep(&(m)->mtx_lock, o, f, l) +#else +#define _mtx_lock_sleep(m, v, t, o, f, l) \ + __mtx_lock_sleep(&(m)->mtx_lock, v, t) +#define _mtx_unlock_sleep(m, o, f, l) \ + __mtx_unlock_sleep(&(m)->mtx_lock) +#endif #ifdef SMP -#define _mtx_lock_spin(m, t, o, f, l) \ - _mtx_lock_spin_cookie(&(m)->mtx_lock, t, o, f, l) +#define _mtx_lock_spin(m, v, t, o, f, l) \ + _mtx_lock_spin_cookie(&(m)->mtx_lock, v, t, o, f, l) #endif #define _mtx_lock_flags(m, o, f, l) \ __mtx_lock_flags(&(m)->mtx_lock, o, f, l) @@ -199,6 +212,9 @@ void thread_lock_flags_(struct thread *, int, const char *, int); #define _mtx_obtain_lock(mp, tid) \ atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) +#define _mtx_obtain_lock_fetch(mp, vp, tid) \ + atomic_fcmpset_acq_ptr(&(mp)->mtx_lock, vp, (tid)) + /* Try to release mtx_lock if it is unrecursed and uncontested. */ #define _mtx_release_lock(mp, tid) \ atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), MTX_UNOWNED) @@ -216,12 +232,11 @@ void thread_lock_flags_(struct thread *, int, const char *, int); /* Lock a normal mutex. */ #define __mtx_lock(mp, tid, opts, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ + uintptr_t _v = MTX_UNOWNED; \ \ - if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid)))\ - _mtx_lock_sleep((mp), _tid, (opts), (file), (line)); \ - else \ - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, \ - mp, 0, 0, file, line); \ + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__acquire) ||\ + !_mtx_obtain_lock_fetch((mp), &_v, _tid))) \ + _mtx_lock_sleep((mp), _v, _tid, (opts), (file), (line));\ } while (0) /* @@ -233,14 +248,12 @@ void thread_lock_flags_(struct thread *, int, const char *, int); #ifdef SMP #define __mtx_lock_spin(mp, tid, opts, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ + uintptr_t _v = MTX_UNOWNED; \ \ spinlock_enter(); \ - if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid))) {\ - if ((mp)->mtx_lock == _tid) \ - (mp)->mtx_recurse++; \ - else \ - _mtx_lock_spin((mp), _tid, (opts), (file), (line)); \ - } else \ + if (!_mtx_obtain_lock_fetch((mp), &_v, _tid)) \ + _mtx_lock_spin((mp), _v, _tid, (opts), (file), (line)); \ + else \ LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, \ mp, 0, 0, file, line); \ } while (0) @@ -291,9 +304,8 @@ void thread_lock_flags_(struct thread *, int, const char *, int); #define __mtx_unlock(mp, tid, opts, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ \ - if ((mp)->mtx_recurse == 0) \ - LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, mp); \ - if ((mp)->mtx_lock != _tid || !_mtx_release_lock((mp), _tid)) \ + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__release) ||\ + !_mtx_release_lock((mp), _tid))) \ _mtx_unlock_sleep((mp), (opts), (file), (line)); \ } while (0) diff --git a/freebsd/sys/sys/nlist_aout.h b/freebsd/sys/sys/nlist_aout.h index cb3dd859..79260ad6 100644 --- a/freebsd/sys/sys/nlist_aout.h +++ b/freebsd/sys/sys/nlist_aout.h @@ -15,7 +15,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/nv.h b/freebsd/sys/sys/nv.h new file mode 100644 index 00000000..fcea2b3e --- /dev/null +++ b/freebsd/sys/sys/nv.h @@ -0,0 +1,246 @@ +/*- + * Copyright (c) 2009-2013 The FreeBSD Foundation + * Copyright (c) 2013-2015 Mariusz Zaborski + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NV_H_ +#define _NV_H_ + +#include + +#ifndef _KERNEL +#include +#include +#include +#include +#endif + +#ifndef _NVLIST_T_DECLARED +#define _NVLIST_T_DECLARED +struct nvlist; + +typedef struct nvlist nvlist_t; +#endif + +#define NV_NAME_MAX 2048 + +#define NV_TYPE_NONE 0 + +#define NV_TYPE_NULL 1 +#define NV_TYPE_BOOL 2 +#define NV_TYPE_NUMBER 3 +#define NV_TYPE_STRING 4 +#define NV_TYPE_NVLIST 5 +#define NV_TYPE_DESCRIPTOR 6 +#define NV_TYPE_BINARY 7 +#define NV_TYPE_BOOL_ARRAY 8 +#define NV_TYPE_NUMBER_ARRAY 9 +#define NV_TYPE_STRING_ARRAY 10 +#define NV_TYPE_NVLIST_ARRAY 11 +#define NV_TYPE_DESCRIPTOR_ARRAY 12 + +/* + * Perform case-insensitive lookups of provided names. + */ +#define NV_FLAG_IGNORE_CASE 0x01 +/* + * Names don't have to be unique. + */ +#define NV_FLAG_NO_UNIQUE 0x02 + +#if defined(_KERNEL) && defined(MALLOC_DECLARE) +MALLOC_DECLARE(M_NVLIST); +#endif + +__BEGIN_DECLS + +nvlist_t *nvlist_create(int flags); +void nvlist_destroy(nvlist_t *nvl); +int nvlist_error(const nvlist_t *nvl); +bool nvlist_empty(const nvlist_t *nvl); +int nvlist_flags(const nvlist_t *nvl); +void nvlist_set_error(nvlist_t *nvl, int error); + +nvlist_t *nvlist_clone(const nvlist_t *nvl); + +#ifndef _KERNEL +void nvlist_dump(const nvlist_t *nvl, int fd); +void nvlist_fdump(const nvlist_t *nvl, FILE *fp); +#endif + +size_t nvlist_size(const nvlist_t *nvl); +void *nvlist_pack(const nvlist_t *nvl, size_t *sizep); +nvlist_t *nvlist_unpack(const void *buf, size_t size, int flags); + +int nvlist_send(int sock, const nvlist_t *nvl); +nvlist_t *nvlist_recv(int sock, int flags); +nvlist_t *nvlist_xfer(int sock, nvlist_t *nvl, int flags); + +const char *nvlist_next(const nvlist_t *nvl, int *typep, void **cookiep); + +const nvlist_t *nvlist_get_parent(const nvlist_t *nvl, void **cookiep); + +const nvlist_t *nvlist_get_array_next(const nvlist_t *nvl); +bool nvlist_in_array(const nvlist_t *nvl); + +const nvlist_t *nvlist_get_pararr(const nvlist_t *nvl, void **cookiep); + +/* + * The nvlist_exists functions check if the given name (optionally of the given + * type) exists on nvlist. + */ + +bool nvlist_exists(const nvlist_t *nvl, const char *name); +bool nvlist_exists_type(const nvlist_t *nvl, const char *name, int type); + +bool nvlist_exists_null(const nvlist_t *nvl, const char *name); +bool nvlist_exists_bool(const nvlist_t *nvl, const char *name); +bool nvlist_exists_number(const nvlist_t *nvl, const char *name); +bool nvlist_exists_string(const nvlist_t *nvl, const char *name); +bool nvlist_exists_nvlist(const nvlist_t *nvl, const char *name); +bool nvlist_exists_binary(const nvlist_t *nvl, const char *name); +bool nvlist_exists_bool_array(const nvlist_t *nvl, const char *name); +bool nvlist_exists_number_array(const nvlist_t *nvl, const char *name); +bool nvlist_exists_string_array(const nvlist_t *nvl, const char *name); +bool nvlist_exists_nvlist_array(const nvlist_t *nvl, const char *name); +#ifndef _KERNEL +bool nvlist_exists_descriptor(const nvlist_t *nvl, const char *name); +bool nvlist_exists_descriptor_array(const nvlist_t *nvl, const char *name); +#endif + +/* + * The nvlist_add functions add the given name/value pair. + * If a pointer is provided, nvlist_add will internally allocate memory for the + * given data (in other words it won't consume provided buffer). + */ + +void nvlist_add_null(nvlist_t *nvl, const char *name); +void nvlist_add_bool(nvlist_t *nvl, const char *name, bool value); +void nvlist_add_number(nvlist_t *nvl, const char *name, uint64_t value); +void nvlist_add_string(nvlist_t *nvl, const char *name, const char *value); +void nvlist_add_stringf(nvlist_t *nvl, const char *name, const char *valuefmt, ...) __printflike(3, 4); +#if !defined(_KERNEL) || defined(_VA_LIST_DECLARED) +void nvlist_add_stringv(nvlist_t *nvl, const char *name, const char *valuefmt, va_list valueap) __printflike(3, 0); +#endif +void nvlist_add_nvlist(nvlist_t *nvl, const char *name, const nvlist_t *value); +void nvlist_add_binary(nvlist_t *nvl, const char *name, const void *value, size_t size); +void nvlist_add_bool_array(nvlist_t *nvl, const char *name, const bool *value, size_t nitems); +void nvlist_add_number_array(nvlist_t *nvl, const char *name, const uint64_t *value, size_t nitems); +void nvlist_add_string_array(nvlist_t *nvl, const char *name, const char * const *value, size_t nitems); +void nvlist_add_nvlist_array(nvlist_t *nvl, const char *name, const nvlist_t * const *value, size_t nitems); +#ifndef _KERNEL +void nvlist_add_descriptor(nvlist_t *nvl, const char *name, int value); +void nvlist_add_descriptor_array(nvlist_t *nvl, const char *name, const int *value, size_t nitems); +#endif + +/* + * The nvlist_move functions add the given name/value pair. + * The functions consumes provided buffer. + */ + +void nvlist_move_string(nvlist_t *nvl, const char *name, char *value); +void nvlist_move_nvlist(nvlist_t *nvl, const char *name, nvlist_t *value); +void nvlist_move_binary(nvlist_t *nvl, const char *name, void *value, size_t size); +void nvlist_move_bool_array(nvlist_t *nvl, const char *name, bool *value, size_t nitems); +void nvlist_move_string_array(nvlist_t *nvl, const char *name, char **value, size_t nitems); +void nvlist_move_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **value, size_t nitems); +void nvlist_move_number_array(nvlist_t *nvl, const char *name, uint64_t *value, size_t nitems); +#ifndef _KERNEL +void nvlist_move_descriptor(nvlist_t *nvl, const char *name, int value); +void nvlist_move_descriptor_array(nvlist_t *nvl, const char *name, int *value, size_t nitems); +#endif + +/* + * The nvlist_get functions returns value associated with the given name. + * If it returns a pointer, the pointer represents internal buffer and should + * not be freed by the caller. + */ + +bool nvlist_get_bool(const nvlist_t *nvl, const char *name); +uint64_t nvlist_get_number(const nvlist_t *nvl, const char *name); +const char *nvlist_get_string(const nvlist_t *nvl, const char *name); +const nvlist_t *nvlist_get_nvlist(const nvlist_t *nvl, const char *name); +const void *nvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep); +const bool *nvlist_get_bool_array(const nvlist_t *nvl, const char *name, size_t *nitemsp); +const uint64_t *nvlist_get_number_array(const nvlist_t *nvl, const char *name, size_t *nitemsp); +const char * const *nvlist_get_string_array(const nvlist_t *nvl, const char *name, size_t *nitemsp); +const nvlist_t * const *nvlist_get_nvlist_array(const nvlist_t *nvl, const char *name, size_t *nitemsp); +#ifndef _KERNEL +int nvlist_get_descriptor(const nvlist_t *nvl, const char *name); +const int *nvlist_get_descriptor_array(const nvlist_t *nvl, const char *name, size_t *nitemsp); +#endif + +/* + * The nvlist_take functions returns value associated with the given name and + * remove the given entry from the nvlist. + * The caller is responsible for freeing received data. + */ + +bool nvlist_take_bool(nvlist_t *nvl, const char *name); +uint64_t nvlist_take_number(nvlist_t *nvl, const char *name); +char *nvlist_take_string(nvlist_t *nvl, const char *name); +nvlist_t *nvlist_take_nvlist(nvlist_t *nvl, const char *name); +void *nvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep); +bool *nvlist_take_bool_array(nvlist_t *nvl, const char *name, size_t *nitemsp); +uint64_t *nvlist_take_number_array(nvlist_t *nvl, const char *name, size_t *nitemsp); +char **nvlist_take_string_array(nvlist_t *nvl, const char *name, size_t *nitemsp); +nvlist_t **nvlist_take_nvlist_array(nvlist_t *nvl, const char *name, size_t *nitemsp); +#ifndef _KERNEL +int nvlist_take_descriptor(nvlist_t *nvl, const char *name); +int *nvlist_take_descriptor_array(nvlist_t *nvl, const char *name, size_t *nitemsp); +#endif + +/* + * The nvlist_free functions removes the given name/value pair from the nvlist + * and frees memory associated with it. + */ + +void nvlist_free(nvlist_t *nvl, const char *name); +void nvlist_free_type(nvlist_t *nvl, const char *name, int type); + +void nvlist_free_null(nvlist_t *nvl, const char *name); +void nvlist_free_bool(nvlist_t *nvl, const char *name); +void nvlist_free_number(nvlist_t *nvl, const char *name); +void nvlist_free_string(nvlist_t *nvl, const char *name); +void nvlist_free_nvlist(nvlist_t *nvl, const char *name); +void nvlist_free_binary(nvlist_t *nvl, const char *name); +void nvlist_free_bool_array(nvlist_t *nvl, const char *name); +void nvlist_free_number_array(nvlist_t *nvl, const char *name); +void nvlist_free_string_array(nvlist_t *nvl, const char *name); +void nvlist_free_nvlist_array(nvlist_t *nvl, const char *name); +void nvlist_free_binary_array(nvlist_t *nvl, const char *name); +#ifndef _KERNEL +void nvlist_free_descriptor(nvlist_t *nvl, const char *name); +void nvlist_free_descriptor_array(nvlist_t *nvl, const char *name); +#endif + +__END_DECLS + +#endif /* !_NV_H_ */ diff --git a/freebsd/sys/sys/pcpu.h b/freebsd/sys/sys/pcpu.h index 2d3f3411..8e246004 100644 --- a/freebsd/sys/sys/pcpu.h +++ b/freebsd/sys/sys/pcpu.h @@ -11,7 +11,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the author nor the names of any co-contributors + * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/pipe.h b/freebsd/sys/sys/pipe.h index d596b3bb..d37c0986 100755 --- a/freebsd/sys/sys/pipe.h +++ b/freebsd/sys/sys/pipe.h @@ -25,7 +25,7 @@ #define _SYS_PIPE_H_ #ifndef _KERNEL -#error "no user-servicable parts inside" +#error "no user-serviceable parts inside" #endif /* diff --git a/freebsd/sys/sys/proc.h b/freebsd/sys/sys/proc.h index ee43997f..7af3dc2e 100644 --- a/freebsd/sys/sys/proc.h +++ b/freebsd/sys/sys/proc.h @@ -15,7 +15,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -148,6 +148,7 @@ struct pargs { * o - ktrace lock * q - td_contested lock * r - p_peers lock + * s - see sleepq_switch(), sleeping_on_old_rtc(), and sleep(9) * t - thread lock * u - process stat lock * w - process timer lock @@ -302,7 +303,7 @@ struct thread { char td_name[MAXCOMLEN + 1]; /* (*) Thread name. */ struct file *td_fpop; /* (k) file referencing cdev under op */ int td_dbgflags; /* (c) Userland debugger flags */ - struct ksiginfo td_dbgksi; /* (c) ksi reflected to debugger. */ + siginfo_t td_si; /* (c) For debugger or core file */ int td_ng_outbound; /* (k) Thread entered ng from above. */ struct osd td_osd; /* (k) Object specific data. */ struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */ @@ -312,6 +313,7 @@ struct thread { int td_dom_rr_idx; /* (k) RR Numa domain selection. */ void *td_su; /* (k) FFS SU private */ sbintime_t td_sleeptimo; /* (t) Sleep timeout. */ + int td_rtcgen; /* (s) rtc_generation of abs. sleep */ #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ @@ -344,7 +346,7 @@ struct thread { } td_state; /* (t) thread state */ union { register_t tdu_retval[2]; - off_t tdu_off; + off_t tdu_off; } td_uretoff; /* (k) Syscall aux returns. */ #else /* __rtems__ */ register_t td_retval[2]; /* (k) Syscall aux returns. */ @@ -372,6 +374,7 @@ struct thread { void *td_emuldata; /* Emulator state data */ int td_lastcpu; /* (t) Last cpu we were on. */ int td_oncpu; /* (t) Which cpu we are on. */ + void *td_lkpi_task; /* LinuxKPI task struct pointer */ #endif /* __rtems__ */ }; @@ -527,6 +530,12 @@ do { \ #define TD_ON_UPILOCK(td) ((td)->td_flags & TDF_UPIBLOCKED) #define TD_IS_IDLETHREAD(td) ((td)->td_flags & TDF_IDLETD) +#define KTDSTATE(td) \ + (((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \ + ((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \ + ((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" : \ + ((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" : \ + ((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding") #define TD_SET_INHIB(td, inhib) do { \ (td)->td_state = TDS_INHIBITED; \ @@ -660,8 +669,11 @@ struct proc { our subtree. */ u_int p_xexit; /* (c) Exit code. */ u_int p_xsig; /* (c) Stop/kill sig. */ + uint16_t p_elf_machine; /* (x) ELF machine type */ + uint64_t p_elf_flags; /* (x) ELF flags */ + /* End area that is copied on creation. */ -#define p_endcopy p_xsig +#define p_endcopy p_elf_flags struct pgrp *p_pgrp; /* (c + e) Pointer to process group. */ struct knlist *p_klist; /* (c) Knotes attached to this proc. */ int p_numthreads; /* (c) Number of threads. */ @@ -1175,6 +1187,15 @@ td_get_sched(struct thread *td) return ((struct td_sched *)&td[1]); } + +extern void (*softdep_ast_cleanup)(struct thread *); +static __inline void +td_softdep_cleanup(struct thread *td) +{ + + if (td->td_su != NULL && softdep_ast_cleanup != NULL) + softdep_ast_cleanup(td); +} #endif /* __rtems__ */ #endif /* _KERNEL */ diff --git a/freebsd/sys/sys/protosw.h b/freebsd/sys/sys/protosw.h index 896ec253..33cc2074 100644 --- a/freebsd/sys/sys/protosw.h +++ b/freebsd/sys/sys/protosw.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/reboot.h b/freebsd/sys/sys/reboot.h index ebe688e8..ec2a5705 100644 --- a/freebsd/sys/sys/reboot.h +++ b/freebsd/sys/sys/reboot.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/resourcevar.h b/freebsd/sys/sys/resourcevar.h index 1d290aaa..d3c50830 100644 --- a/freebsd/sys/sys/resourcevar.h +++ b/freebsd/sys/sys/resourcevar.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/rwlock.h b/freebsd/sys/sys/rwlock.h index e0003840..8988ecbd 100644 --- a/freebsd/sys/sys/rwlock.h +++ b/freebsd/sys/sys/rwlock.h @@ -61,13 +61,14 @@ #define RW_LOCK_READ_WAITERS 0x02 #define RW_LOCK_WRITE_WAITERS 0x04 #define RW_LOCK_WRITE_SPINNER 0x08 +#define RW_LOCK_WRITER_RECURSED 0x10 #define RW_LOCK_FLAGMASK \ (RW_LOCK_READ | RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS | \ - RW_LOCK_WRITE_SPINNER) + RW_LOCK_WRITE_SPINNER | RW_LOCK_WRITER_RECURSED) #define RW_LOCK_WAITERS (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS) #define RW_OWNER(x) ((x) & ~RW_LOCK_FLAGMASK) -#define RW_READERS_SHIFT 4 +#define RW_READERS_SHIFT 5 #define RW_READERS(x) (RW_OWNER((x)) >> RW_READERS_SHIFT) #define RW_READERS_LOCK(x) ((x) << RW_READERS_SHIFT | RW_LOCK_READ) #define RW_ONE_READER (1 << RW_READERS_SHIFT) @@ -79,12 +80,17 @@ #define rw_recurse lock_object.lo_data +#define RW_READ_VALUE(x) ((x)->rw_lock) + /* Very simple operations on rw_lock. */ /* Try to obtain a write lock once. */ #define _rw_write_lock(rw, tid) \ atomic_cmpset_acq_ptr(&(rw)->rw_lock, RW_UNLOCKED, (tid)) +#define _rw_write_lock_fetch(rw, vp, tid) \ + atomic_fcmpset_acq_ptr(&(rw)->rw_lock, vp, (tid)) + /* Release a write lock quickly if there are no waiters. */ #define _rw_write_unlock(rw, tid) \ atomic_cmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED) @@ -99,26 +105,20 @@ /* Acquire a write lock. */ #define __rw_wlock(rw, tid, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ + uintptr_t _v = RW_UNLOCKED; \ \ - if ((rw)->rw_lock != RW_UNLOCKED || !_rw_write_lock((rw), _tid))\ - _rw_wlock_hard((rw), _tid, (file), (line)); \ - else \ - LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, \ - 0, 0, file, line, LOCKSTAT_WRITER); \ + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) || \ + !_rw_write_lock_fetch((rw), &_v, _tid))) \ + _rw_wlock_hard((rw), _v, _tid, (file), (line)); \ } while (0) /* Release a write lock. */ #define __rw_wunlock(rw, tid, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ \ - if ((rw)->rw_recurse) \ - (rw)->rw_recurse--; \ - else { \ - LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, \ - LOCKSTAT_WRITER); \ - if ((rw)->rw_lock != _tid || !_rw_write_unlock((rw), _tid))\ - _rw_wunlock_hard((rw), _tid, (file), (line)); \ - } \ + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) || \ + !_rw_write_unlock((rw), _tid))) \ + _rw_wunlock_hard((rw), _tid, (file), (line)); \ } while (0) #endif /* __rtems__ */ @@ -139,8 +139,8 @@ void _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line); void __rw_rlock(volatile uintptr_t *c, const char *file, int line); int __rw_try_rlock(volatile uintptr_t *c, const char *file, int line); void _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line); -void __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, - int line); +void __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v, uintptr_t tid, + const char *file, int line); void __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, int line); int __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line); @@ -192,8 +192,8 @@ void _rw_downgrade(struct rwlock *rw, const char *file, int line); __rw_try_rlock(&(rw)->rw_lock, f, l) #define _rw_runlock(rw, f, l) \ _rw_runlock_cookie(&(rw)->rw_lock, f, l) -#define _rw_wlock_hard(rw, t, f, l) \ - __rw_wlock_hard(&(rw)->rw_lock, t, f, l) +#define _rw_wlock_hard(rw, v, t, f, l) \ + __rw_wlock_hard(&(rw)->rw_lock, v, t, f, l) #define _rw_wunlock_hard(rw, t, f, l) \ __rw_wunlock_hard(&(rw)->rw_lock, t, f, l) #define _rw_try_upgrade(rw, f, l) \ diff --git a/freebsd/sys/sys/sbuf.h b/freebsd/sys/sys/sbuf.h index 580cbd2e..c05bafd1 100644 --- a/freebsd/sys/sys/sbuf.h +++ b/freebsd/sys/sys/sbuf.h @@ -99,6 +99,7 @@ void sbuf_start_section(struct sbuf *, ssize_t *); ssize_t sbuf_end_section(struct sbuf *, ssize_t, size_t, int); void sbuf_hexdump(struct sbuf *, const void *, int, const char *, int); +void sbuf_putbuf(struct sbuf *); #ifdef _KERNEL struct uio; diff --git a/freebsd/sys/sys/sdt.h b/freebsd/sys/sys/sdt.h index 25423d76..c680ea85 100644 --- a/freebsd/sys/sys/sdt.h +++ b/freebsd/sys/sys/sdt.h @@ -86,6 +86,7 @@ #define SDT_PROVIDER_DECLARE(prov) #define SDT_PROBE_DEFINE(prov, mod, func, name) #define SDT_PROBE_DECLARE(prov, mod, func, name) +#define SDT_PROBE_ENABLED(prov, mod, func, name) 0 #define SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) #define SDT_PROBE_ARGTYPE(prov, mod, func, name, num, type, xtype) @@ -160,6 +161,9 @@ SET_DECLARE(sdt_argtypes_set, struct sdt_argtype); #define SDT_PROBE_DECLARE(prov, mod, func, name) \ extern struct sdt_probe sdt_##prov##_##mod##_##func##_##name[1] +#define SDT_PROBE_ENABLED(prov, mod, func, name) \ + __predict_false((sdt_##prov##_##mod##_##func##_##name->id)) + #define SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) do { \ if (__predict_false(sdt_##prov##_##mod##_##func##_##name->id)) \ (*sdt_probe_func)(sdt_##prov##_##mod##_##func##_##name->id, \ diff --git a/freebsd/sys/sys/selinfo.h b/freebsd/sys/sys/selinfo.h index 590d184a..85de231e 100644 --- a/freebsd/sys/sys/selinfo.h +++ b/freebsd/sys/sys/selinfo.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/sigio.h b/freebsd/sys/sys/sigio.h index 74047b78..e941ae96 100644 --- a/freebsd/sys/sys/sigio.h +++ b/freebsd/sys/sys/sigio.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/signalvar.h b/freebsd/sys/sys/signalvar.h index a2a1d0d8..22f9ef4a 100644 --- a/freebsd/sys/sys/signalvar.h +++ b/freebsd/sys/sys/signalvar.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -236,15 +236,17 @@ typedef struct ksiginfo { #define KSI_TRAP 0x01 /* Generated by trap. */ #define KSI_EXT 0x02 /* Externally managed ksi. */ #define KSI_INS 0x04 /* Directly insert ksi, not the copy */ -#define KSI_SIGQ 0x08 /* Generated by sigqueue, might ret EGAIN. */ +#define KSI_SIGQ 0x08 /* Generated by sigqueue, might ret EAGAIN. */ #define KSI_HEAD 0x10 /* Insert into head, not tail. */ -#define KSI_COPYMASK (KSI_TRAP|KSI_SIGQ) +#define KSI_PTRACE 0x20 /* Generated by ptrace. */ +#define KSI_COPYMASK (KSI_TRAP | KSI_SIGQ | KSI_PTRACE) #define KSI_ONQ(ksi) ((ksi)->ksi_sigq != NULL) typedef struct sigqueue { sigset_t sq_signals; /* All pending signals. */ sigset_t sq_kill; /* Legacy depth 1 queue. */ + sigset_t sq_ptrace; /* Depth 1 queue for ptrace(2). */ TAILQ_HEAD(, ksiginfo) sq_list;/* Queued signal info. */ struct proc *sq_proc; int sq_flags; @@ -371,7 +373,7 @@ void pgsigio(struct sigio **sigiop, int sig, int checkctty); void pgsignal(struct pgrp *pgrp, int sig, int checkctty, ksiginfo_t *ksi); int postsig(int sig); void kern_psignal(struct proc *p, int sig); -int ptracestop(struct thread *td, int sig); +int ptracestop(struct thread *td, int sig, ksiginfo_t *si); void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *retmask); struct sigacts *sigacts_alloc(void); void sigacts_copy(struct sigacts *dest, struct sigacts *src); diff --git a/freebsd/sys/sys/sleepqueue.h b/freebsd/sys/sys/sleepqueue.h index d59dc7e5..30a18933 100644 --- a/freebsd/sys/sys/sleepqueue.h +++ b/freebsd/sys/sys/sleepqueue.h @@ -90,11 +90,14 @@ void sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, int queue); struct sleepqueue *sleepq_alloc(void); int sleepq_broadcast(void *wchan, int flags, int pri, int queue); +void sleepq_chains_remove_matching(bool (*matches)(struct thread *)); void sleepq_free(struct sleepqueue *sq); void sleepq_lock(void *wchan); struct sleepqueue *sleepq_lookup(void *wchan); void sleepq_release(void *wchan); void sleepq_remove(struct thread *td, void *wchan); +int sleepq_remove_matching(struct sleepqueue *sq, int queue, + bool (*matches)(struct thread *), int pri); int sleepq_signal(void *wchan, int flags, int pri, int queue); void sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr, int flags); diff --git a/freebsd/sys/sys/slicer.h b/freebsd/sys/sys/slicer.h index 9bf8748f..53f680d8 100644 --- a/freebsd/sys/sys/slicer.h +++ b/freebsd/sys/sys/slicer.h @@ -27,26 +27,38 @@ */ #ifndef _FLASH_SLICER_H_ -#define _FLASH_SLICER_H_ +#define _FLASH_SLICER_H_ #include -#define FLASH_SLICES_MAX_NUM 8 -#define FLASH_SLICES_MAX_NAME_LEN (32 + 1) +#define FLASH_SLICES_MAX_NUM 8 +#define FLASH_SLICES_MAX_NAME_LEN (32 + 1) #define FLASH_SLICES_FLAG_NONE 0 #define FLASH_SLICES_FLAG_RO 1 /* Read only */ +#define FLASH_SLICES_FMT "%ss.%s" + struct flash_slice { off_t base; off_t size; - char *label; + const char *label; unsigned int flags; }; #ifdef _KERNEL -int fdt_flash_fill_slices(device_t, struct flash_slice *, int *) __weak_symbol; -void flash_register_slicer(int (*)(device_t, struct flash_slice *, int *)); + +typedef int (*flash_slicer_t)(device_t dev, const char *provider, + struct flash_slice *slices, int *slices_num); + +#define FLASH_SLICES_TYPE_NAND 0 +#define FLASH_SLICES_TYPE_CFI 1 +#define FLASH_SLICES_TYPE_SPI 2 +#define FLASH_SLICES_TYPE_MMC 3 + +/* Use NULL for deregistering a slicer */ +void flash_register_slicer(flash_slicer_t slicer, u_int type, bool force); + #endif /* _KERNEL */ #endif /* _FLASH_SLICER_H_ */ diff --git a/freebsd/sys/sys/sockbuf.h b/freebsd/sys/sys/sockbuf.h index 2c03b3ed..b1ebe62f 100644 --- a/freebsd/sys/sys/sockbuf.h +++ b/freebsd/sys/sys/sockbuf.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/socket.h b/freebsd/sys/sys/socket.h index 9429f5a1..0ef59dc4 100644 --- a/freebsd/sys/sys/socket.h +++ b/freebsd/sys/sys/socket.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -158,6 +158,17 @@ typedef __uintptr_t uintptr_t; #define SO_USER_COOKIE 0x1015 /* user cookie (dummynet etc.) */ #define SO_PROTOCOL 0x1016 /* get socket protocol (Linux name) */ #define SO_PROTOTYPE SO_PROTOCOL /* alias for SO_PROTOCOL (SunOS name) */ +#define SO_TS_CLOCK 0x1017 /* clock type used for SO_TIMESTAMP */ +#define SO_MAX_PACING_RATE 0x1018 /* socket's max TX pacing rate (Linux name) */ +#endif + +#if __BSD_VISIBLE +#define SO_TS_REALTIME_MICRO 0 /* microsecond resolution, realtime */ +#define SO_TS_BINTIME 1 /* sub-nanosecond resolution, realtime */ +#define SO_TS_REALTIME 2 /* nanosecond resolution, realtime */ +#define SO_TS_MONOTONIC 3 /* nanosecond resolution, monotonic */ +#define SO_TS_DEFAULT SO_TS_REALTIME_MICRO +#define SO_TS_CLOCK_MAX SO_TS_MONOTONIC #endif /* @@ -414,28 +425,36 @@ struct msghdr { int msg_flags; /* flags on received message */ }; -#define MSG_OOB 0x1 /* process out-of-band data */ -#define MSG_PEEK 0x2 /* peek at incoming message */ -#define MSG_DONTROUTE 0x4 /* send without using routing tables */ -#define MSG_EOR 0x8 /* data completes record */ -#define MSG_TRUNC 0x10 /* data discarded before delivery */ -#define MSG_CTRUNC 0x20 /* control data lost before delivery */ -#define MSG_WAITALL 0x40 /* wait for full request or error */ +#define MSG_OOB 0x00000001 /* process out-of-band data */ +#define MSG_PEEK 0x00000002 /* peek at incoming message */ +#define MSG_DONTROUTE 0x00000004 /* send without using routing tables */ +#define MSG_EOR 0x00000008 /* data completes record */ +#define MSG_TRUNC 0x00000010 /* data discarded before delivery */ +#define MSG_CTRUNC 0x00000020 /* control data lost before delivery */ +#define MSG_WAITALL 0x00000040 /* wait for full request or error */ +#if __BSD_VISIBLE +#define MSG_DONTWAIT 0x00000080 /* this message should be nonblocking */ +#define MSG_EOF 0x00000100 /* data completes connection */ +/* 0x00000200 unused */ +/* 0x00000400 unused */ +/* 0x00000800 unused */ +/* 0x00001000 unused */ +#define MSG_NOTIFICATION 0x00002000 /* SCTP notification */ +#define MSG_NBIO 0x00004000 /* FIONBIO mode, used by fifofs */ +#define MSG_COMPAT 0x00008000 /* used in sendit() */ +#endif +#ifdef _KERNEL +#define MSG_SOCALLBCK 0x00010000 /* for use by socket callbacks - soreceive (TCP) */ +#endif #if __POSIX_VISIBLE >= 200809 -#define MSG_NOSIGNAL 0x20000 /* do not generate SIGPIPE on EOF */ +#define MSG_NOSIGNAL 0x00020000 /* do not generate SIGPIPE on EOF */ #endif #if __BSD_VISIBLE -#define MSG_DONTWAIT 0x80 /* this message should be nonblocking */ -#define MSG_EOF 0x100 /* data completes connection */ -#define MSG_NOTIFICATION 0x2000 /* SCTP notification */ -#define MSG_NBIO 0x4000 /* FIONBIO mode, used by fifofs */ -#define MSG_COMPAT 0x8000 /* used in sendit() */ -#define MSG_CMSG_CLOEXEC 0x40000 /* make received fds close-on-exec */ -#define MSG_WAITFORONE 0x80000 /* for recvmmsg() */ +#define MSG_CMSG_CLOEXEC 0x00040000 /* make received fds close-on-exec */ +#define MSG_WAITFORONE 0x00080000 /* for recvmmsg() */ #endif #ifdef _KERNEL -#define MSG_SOCALLBCK 0x10000 /* for use by socket callbacks - soreceive (TCP) */ -#define MSG_MORETOCOME 0x20000 /* additional data pending */ +#define MSG_MORETOCOME 0x00100000 /* additional data pending */ #endif /* @@ -534,6 +553,8 @@ struct sockcred { #define SCM_TIMESTAMP 0x02 /* timestamp (struct timeval) */ #define SCM_CREDS 0x03 /* process creds (struct cmsgcred) */ #define SCM_BINTIME 0x04 /* timestamp (struct bintime) */ +#define SCM_REALTIME 0x05 /* timestamp (struct timespec) */ +#define SCM_MONOTONIC 0x06 /* timestamp (struct timespec) */ #endif #if __BSD_VISIBLE diff --git a/freebsd/sys/sys/socketvar.h b/freebsd/sys/sys/socketvar.h index 1e599a66..dd1ec474 100644 --- a/freebsd/sys/sys/socketvar.h +++ b/freebsd/sys/sys/socketvar.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -127,8 +127,11 @@ struct socket { int so_fibnum; /* routing domain for this socket */ uint32_t so_user_cookie; - void *so_pspare[2]; /* packet pacing / general use */ - int so_ispare[2]; /* packet pacing / general use */ + int so_ts_clock; /* type of the clock used for timestamps */ + uint32_t so_max_pacing_rate; /* (f) TX rate limit in bytes/s */ + + void *so_pspare[2]; /* general use */ + int so_ispare[2]; /* general use */ }; /* diff --git a/freebsd/sys/sys/sockopt.h b/freebsd/sys/sys/sockopt.h index 69d6c6dc..4131a5b7 100644 --- a/freebsd/sys/sys/sockopt.h +++ b/freebsd/sys/sys/sockopt.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -34,7 +34,7 @@ #define _SYS_SOCKOPT_H_ #ifndef _KERNEL -#error "no user-servicable parts inside" +#error "no user-serviceable parts inside" #endif diff --git a/freebsd/sys/sys/sockstate.h b/freebsd/sys/sys/sockstate.h index 52c85b76..9648f5a3 100644 --- a/freebsd/sys/sys/sockstate.h +++ b/freebsd/sys/sys/sockstate.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/stdint.h b/freebsd/sys/sys/stdint.h index 2b253137..4c41ec14 100644 --- a/freebsd/sys/sys/stdint.h +++ b/freebsd/sys/sys/stdint.h @@ -70,4 +70,11 @@ typedef __uint_fast64_t uint_fast64_t; #define WCHAR_MAX __WCHAR_MAX #endif /* __rtems__ */ +#if __EXT1_VISIBLE +/* ISO/IEC 9899:2011 K.3.4.4 */ +#ifndef RSIZE_MAX +#define RSIZE_MAX (SIZE_MAX >> 1) +#endif +#endif /* __EXT1_VISIBLE */ + #endif /* !_SYS_STDINT_H_ */ diff --git a/freebsd/sys/sys/sx.h b/freebsd/sys/sys/sx.h index c285fa77..0c95df16 100644 --- a/freebsd/sys/sys/sx.h +++ b/freebsd/sys/sys/sx.h @@ -94,6 +94,11 @@ #define sx_recurse lock_object.lo_data +#define SX_READ_VALUE(sx) ((sx)->sx_lock) + +#define lv_sx_owner(v) \ + ((v & SX_LOCK_SHARED) ? NULL : (struct thread *)SX_OWNER(v)) + /* * Function prototipes. Routines that start with an underscore are not part * of the public interface and are wrappered with a macro. @@ -110,12 +115,10 @@ int _sx_slock(struct sx *sx, int opts, const char *file, int line); int _sx_xlock(struct sx *sx, int opts, const char *file, int line); void _sx_sunlock(struct sx *sx, const char *file, int line); void _sx_xunlock(struct sx *sx, const char *file, int line); -int _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, +int _sx_xlock_hard(struct sx *sx, uintptr_t v, uintptr_t tid, int opts, const char *file, int line); -int _sx_slock_hard(struct sx *sx, int opts, const char *file, int line); void _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line); -void _sx_sunlock_hard(struct sx *sx, const char *file, int line); #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void _sx_assert(const struct sx *sx, int what, const char *file, int line); #endif @@ -149,20 +152,19 @@ struct sx_args { * deferred to 'tougher' functions. */ +#if (LOCK_DEBUG == 0) /* Acquire an exclusive lock. */ static __inline int __sx_xlock(struct sx *sx, struct thread *td, int opts, const char *file, int line) { uintptr_t tid = (uintptr_t)td; + uintptr_t v = SX_LOCK_UNLOCKED; int error = 0; - if (sx->sx_lock != SX_LOCK_UNLOCKED || - !atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) - error = _sx_xlock_hard(sx, tid, opts, file, line); - else - LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, - 0, 0, file, line, LOCKSTAT_WRITER); + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__acquire) || + !atomic_fcmpset_acq_ptr(&sx->sx_lock, &v, tid))) + error = _sx_xlock_hard(sx, v, tid, opts, file, line); return (error); } @@ -173,48 +175,11 @@ __sx_xunlock(struct sx *sx, struct thread *td, const char *file, int line) { uintptr_t tid = (uintptr_t)td; - if (sx->sx_recurse == 0) - LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, - LOCKSTAT_WRITER); - if (sx->sx_lock != tid || - !atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED)) + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__release) || + !atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))) _sx_xunlock_hard(sx, tid, file, line); } - -/* Acquire a shared lock. */ -static __inline int -__sx_slock(struct sx *sx, int opts, const char *file, int line) -{ - uintptr_t x = sx->sx_lock; - int error = 0; - - if (!(x & SX_LOCK_SHARED) || - !atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) - error = _sx_slock_hard(sx, opts, file, line); - else - LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, - 0, 0, file, line, LOCKSTAT_READER); - - return (error); -} - -/* - * Release a shared lock. We can just drop a single shared lock so - * long as we aren't trying to drop the last shared lock when other - * threads are waiting for an exclusive lock. This takes advantage of - * the fact that an unlocked lock is encoded as a shared lock with a - * count of 0. - */ -static __inline void -__sx_sunlock(struct sx *sx, const char *file, int line) -{ - uintptr_t x = sx->sx_lock; - - LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER); - if (x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS) || - !atomic_cmpset_rel_ptr(&sx->sx_lock, x, x - SX_ONE_SHARER)) - _sx_sunlock_hard(sx, file, line); -} +#endif #endif /* __rtems__ */ /* @@ -230,12 +195,6 @@ __sx_sunlock(struct sx *sx, const char *file, int line) _sx_xlock((sx), SX_INTERRUPTIBLE, (file), (line)) #define sx_xunlock_(sx, file, line) \ _sx_xunlock((sx), (file), (line)) -#define sx_slock_(sx, file, line) \ - (void)_sx_slock((sx), 0, (file), (line)) -#define sx_slock_sig_(sx, file, line) \ - _sx_slock((sx), SX_INTERRUPTIBLE, (file) , (line)) -#define sx_sunlock_(sx, file, line) \ - _sx_sunlock((sx), (file), (line)) #else #define sx_xlock_(sx, file, line) \ (void)__sx_xlock((sx), curthread, 0, (file), (line)) @@ -243,13 +202,13 @@ __sx_sunlock(struct sx *sx, const char *file, int line) __sx_xlock((sx), curthread, SX_INTERRUPTIBLE, (file), (line)) #define sx_xunlock_(sx, file, line) \ __sx_xunlock((sx), curthread, (file), (line)) +#endif /* LOCK_DEBUG > 0 || SX_NOINLINE */ #define sx_slock_(sx, file, line) \ - (void)__sx_slock((sx), 0, (file), (line)) + (void)_sx_slock((sx), 0, (file), (line)) #define sx_slock_sig_(sx, file, line) \ - __sx_slock((sx), SX_INTERRUPTIBLE, (file), (line)) + _sx_slock((sx), SX_INTERRUPTIBLE, (file) , (line)) #define sx_sunlock_(sx, file, line) \ - __sx_sunlock((sx), (file), (line)) -#endif /* LOCK_DEBUG > 0 || SX_NOINLINE */ + _sx_sunlock((sx), (file), (line)) #define sx_try_slock(sx) sx_try_slock_((sx), LOCK_FILE, LOCK_LINE) #define sx_try_xlock(sx) sx_try_xlock_((sx), LOCK_FILE, LOCK_LINE) #define sx_try_upgrade(sx) sx_try_upgrade_((sx), LOCK_FILE, LOCK_LINE) diff --git a/freebsd/sys/sys/sysctl.h b/freebsd/sys/sys/sysctl.h index 988cec2e..71da475b 100644 --- a/freebsd/sys/sys/sysctl.h +++ b/freebsd/sys/sys/sysctl.h @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -1048,6 +1048,9 @@ SYSCTL_DECL(_compat); SYSCTL_DECL(_regression); SYSCTL_DECL(_security); SYSCTL_DECL(_security_bsd); +#ifdef EXT_RESOURCES +SYSCTL_DECL(_clock); +#endif extern char machine[]; extern char osrelease[]; diff --git a/freebsd/sys/sys/syslog.h b/freebsd/sys/sys/syslog.h index 61bad21c..f31cfb97 100644 --- a/freebsd/sys/sys/syslog.h +++ b/freebsd/sys/sys/syslog.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/sysproto.h b/freebsd/sys/sys/sysproto.h index a1673035..bdd20804 100644 --- a/freebsd/sys/sys/sysproto.h +++ b/freebsd/sys/sys/sysproto.h @@ -3,7 +3,6 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/kern/syscalls.master 310638 2016-12-27 20:21:11Z jhb */ #ifndef _SYS_SYSPROTO_H_ @@ -737,6 +736,12 @@ struct ffclock_setestimate_args { struct ffclock_getestimate_args { char cest_l_[PADL_(struct ffclock_estimate *)]; struct ffclock_estimate * cest; char cest_r_[PADR_(struct ffclock_estimate *)]; }; +struct clock_nanosleep_args { + char clock_id_l_[PADL_(clockid_t)]; clockid_t clock_id; char clock_id_r_[PADR_(clockid_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; + char rqtp_l_[PADL_(const struct timespec *)]; const struct timespec * rqtp; char rqtp_r_[PADR_(const struct timespec *)]; + char rmtp_l_[PADL_(struct timespec *)]; struct timespec * rmtp; char rmtp_r_[PADR_(struct timespec *)]; +}; struct clock_getcpuclockid2_args { char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)]; char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)]; @@ -1982,6 +1987,7 @@ int sys_nanosleep(struct thread *, struct nanosleep_args *); int sys_ffclock_getcounter(struct thread *, struct ffclock_getcounter_args *); int sys_ffclock_setestimate(struct thread *, struct ffclock_setestimate_args *); int sys_ffclock_getestimate(struct thread *, struct ffclock_getestimate_args *); +int sys_clock_nanosleep(struct thread *, struct clock_nanosleep_args *); int sys_clock_getcpuclockid2(struct thread *, struct clock_getcpuclockid2_args *); int sys_ntp_gettime(struct thread *, struct ntp_gettime_args *); int sys_minherit(struct thread *, struct minherit_args *); @@ -2705,7 +2711,7 @@ int freebsd10_pipe(struct thread *, struct freebsd10_pipe_args *); #define SYS_AUE_shmsys AUE_SHMSYS #define SYS_AUE_freebsd6_pread AUE_PREAD #define SYS_AUE_freebsd6_pwrite AUE_PWRITE -#define SYS_AUE_setfib AUE_NULL +#define SYS_AUE_setfib AUE_SETFIB #define SYS_AUE_ntp_adjtime AUE_NTP_ADJTIME #define SYS_AUE_setgid AUE_SETGID #define SYS_AUE_setegid AUE_SETEGID @@ -2752,15 +2758,16 @@ int freebsd10_pipe(struct thread *, struct freebsd10_pipe_args *); #define SYS_AUE_ffclock_getcounter AUE_NULL #define SYS_AUE_ffclock_setestimate AUE_NULL #define SYS_AUE_ffclock_getestimate AUE_NULL +#define SYS_AUE_clock_nanosleep AUE_NULL #define SYS_AUE_clock_getcpuclockid2 AUE_NULL #define SYS_AUE_ntp_gettime AUE_NULL #define SYS_AUE_minherit AUE_MINHERIT #define SYS_AUE_rfork AUE_RFORK #define SYS_AUE_issetugid AUE_ISSETUGID #define SYS_AUE_lchown AUE_LCHOWN -#define SYS_AUE_aio_read AUE_NULL -#define SYS_AUE_aio_write AUE_NULL -#define SYS_AUE_lio_listio AUE_NULL +#define SYS_AUE_aio_read AUE_AIO_READ +#define SYS_AUE_aio_write AUE_AIO_WRITE +#define SYS_AUE_lio_listio AUE_LIO_LISTIO #define SYS_AUE_getdents AUE_O_GETDENTS #define SYS_AUE_lchmod AUE_LCHMOD #define SYS_AUE_lutimes AUE_LUTIMES @@ -2785,13 +2792,13 @@ int freebsd10_pipe(struct thread *, struct freebsd10_pipe_args *); #define SYS_AUE_getsid AUE_GETSID #define SYS_AUE_setresuid AUE_SETRESUID #define SYS_AUE_setresgid AUE_SETRESGID -#define SYS_AUE_aio_return AUE_NULL -#define SYS_AUE_aio_suspend AUE_NULL -#define SYS_AUE_aio_cancel AUE_NULL -#define SYS_AUE_aio_error AUE_NULL -#define SYS_AUE_freebsd6_aio_read AUE_NULL -#define SYS_AUE_freebsd6_aio_write AUE_NULL -#define SYS_AUE_freebsd6_lio_listio AUE_NULL +#define SYS_AUE_aio_return AUE_AIO_RETURN +#define SYS_AUE_aio_suspend AUE_AIO_SUSPEND +#define SYS_AUE_aio_cancel AUE_AIO_CANCEL +#define SYS_AUE_aio_error AUE_AIO_ERROR +#define SYS_AUE_freebsd6_aio_read AUE_AIO_READ +#define SYS_AUE_freebsd6_aio_write AUE_AIO_WRITE +#define SYS_AUE_freebsd6_lio_listio AUE_LIO_LISTIO #define SYS_AUE_yield AUE_NULL #define SYS_AUE_mlockall AUE_MLOCKALL #define SYS_AUE_munlockall AUE_MUNLOCKALL @@ -2816,27 +2823,27 @@ int freebsd10_pipe(struct thread *, struct freebsd10_pipe_args *); #define SYS_AUE_freebsd4_sigreturn AUE_SIGRETURN #define SYS_AUE_sigtimedwait AUE_SIGWAIT #define SYS_AUE_sigwaitinfo AUE_NULL -#define SYS_AUE___acl_get_file AUE_NULL -#define SYS_AUE___acl_set_file AUE_NULL -#define SYS_AUE___acl_get_fd AUE_NULL -#define SYS_AUE___acl_set_fd AUE_NULL -#define SYS_AUE___acl_delete_file AUE_NULL -#define SYS_AUE___acl_delete_fd AUE_NULL -#define SYS_AUE___acl_aclcheck_file AUE_NULL -#define SYS_AUE___acl_aclcheck_fd AUE_NULL +#define SYS_AUE___acl_get_file AUE_ACL_GET_FILE +#define SYS_AUE___acl_set_file AUE_ACL_SET_FILE +#define SYS_AUE___acl_get_fd AUE_ACL_GET_FD +#define SYS_AUE___acl_set_fd AUE_ACL_SET_FD +#define SYS_AUE___acl_delete_file AUE_ACL_DELETE_FILE +#define SYS_AUE___acl_delete_fd AUE_ACL_DELETE_FD +#define SYS_AUE___acl_aclcheck_file AUE_ACL_CHECK_FILE +#define SYS_AUE___acl_aclcheck_fd AUE_ACL_CHECK_FD #define SYS_AUE_extattrctl AUE_EXTATTRCTL #define SYS_AUE_extattr_set_file AUE_EXTATTR_SET_FILE #define SYS_AUE_extattr_get_file AUE_EXTATTR_GET_FILE #define SYS_AUE_extattr_delete_file AUE_EXTATTR_DELETE_FILE -#define SYS_AUE_aio_waitcomplete AUE_NULL +#define SYS_AUE_aio_waitcomplete AUE_AIO_WAITCOMPLETE #define SYS_AUE_getresuid AUE_GETRESUID #define SYS_AUE_getresgid AUE_GETRESGID #define SYS_AUE_kqueue AUE_KQUEUE -#define SYS_AUE_kevent AUE_NULL +#define SYS_AUE_kevent AUE_KEVENT #define SYS_AUE_extattr_set_fd AUE_EXTATTR_SET_FD #define SYS_AUE_extattr_get_fd AUE_EXTATTR_GET_FD #define SYS_AUE_extattr_delete_fd AUE_EXTATTR_DELETE_FD -#define SYS_AUE___setugid AUE_NULL +#define SYS_AUE___setugid AUE_SETUGID #define SYS_AUE_eaccess AUE_EACCESS #define SYS_AUE_afs3_syscall AUE_NULL #define SYS_AUE_nmount AUE_NMOUNT @@ -2855,15 +2862,15 @@ int freebsd10_pipe(struct thread *, struct freebsd10_pipe_args *); #define SYS_AUE_statfs AUE_STATFS #define SYS_AUE_fstatfs AUE_FSTATFS #define SYS_AUE_fhstatfs AUE_FHSTATFS -#define SYS_AUE_ksem_close AUE_NULL -#define SYS_AUE_ksem_post AUE_NULL -#define SYS_AUE_ksem_wait AUE_NULL -#define SYS_AUE_ksem_trywait AUE_NULL -#define SYS_AUE_ksem_init AUE_NULL -#define SYS_AUE_ksem_open AUE_NULL -#define SYS_AUE_ksem_unlink AUE_NULL -#define SYS_AUE_ksem_getvalue AUE_NULL -#define SYS_AUE_ksem_destroy AUE_NULL +#define SYS_AUE_ksem_close AUE_SEMCLOSE +#define SYS_AUE_ksem_post AUE_SEMPOST +#define SYS_AUE_ksem_wait AUE_SEMWAIT +#define SYS_AUE_ksem_trywait AUE_SEMTRYWAIT +#define SYS_AUE_ksem_init AUE_SEMINIT +#define SYS_AUE_ksem_open AUE_SEMOPEN +#define SYS_AUE_ksem_unlink AUE_SEMUNLINK +#define SYS_AUE_ksem_getvalue AUE_SEMGETVALUE +#define SYS_AUE_ksem_destroy AUE_SEMDESTROY #define SYS_AUE___mac_get_pid AUE_NULL #define SYS_AUE___mac_get_link AUE_NULL #define SYS_AUE___mac_set_link AUE_NULL @@ -2877,20 +2884,20 @@ int freebsd10_pipe(struct thread *, struct freebsd10_pipe_args *); #define SYS_AUE_setcontext AUE_NULL #define SYS_AUE_swapcontext AUE_NULL #define SYS_AUE_swapoff AUE_SWAPOFF -#define SYS_AUE___acl_get_link AUE_NULL -#define SYS_AUE___acl_set_link AUE_NULL -#define SYS_AUE___acl_delete_link AUE_NULL -#define SYS_AUE___acl_aclcheck_link AUE_NULL +#define SYS_AUE___acl_get_link AUE_ACL_GET_LINK +#define SYS_AUE___acl_set_link AUE_ACL_SET_LINK +#define SYS_AUE___acl_delete_link AUE_ACL_DELETE_LINK +#define SYS_AUE___acl_aclcheck_link AUE_ACL_CHECK_LINK #define SYS_AUE_sigwait AUE_SIGWAIT -#define SYS_AUE_thr_create AUE_NULL -#define SYS_AUE_thr_exit AUE_NULL +#define SYS_AUE_thr_create AUE_THR_CREATE +#define SYS_AUE_thr_exit AUE_THR_EXIT #define SYS_AUE_thr_self AUE_NULL -#define SYS_AUE_thr_kill AUE_NULL -#define SYS_AUE_jail_attach AUE_NULL +#define SYS_AUE_thr_kill AUE_THR_KILL +#define SYS_AUE_jail_attach AUE_JAIL_ATTACH #define SYS_AUE_extattr_list_fd AUE_EXTATTR_LIST_FD #define SYS_AUE_extattr_list_file AUE_EXTATTR_LIST_FILE #define SYS_AUE_extattr_list_link AUE_EXTATTR_LIST_LINK -#define SYS_AUE_ksem_timedwait AUE_NULL +#define SYS_AUE_ksem_timedwait AUE_SEMWAIT #define SYS_AUE_thr_suspend AUE_NULL #define SYS_AUE_thr_wake AUE_NULL #define SYS_AUE_kldunloadf AUE_MODUNLOAD @@ -2904,29 +2911,29 @@ int freebsd10_pipe(struct thread *, struct freebsd10_pipe_args *); #define SYS_AUE_setaudit_addr AUE_SETAUDIT_ADDR #define SYS_AUE_auditctl AUE_AUDITCTL #define SYS_AUE__umtx_op AUE_NULL -#define SYS_AUE_thr_new AUE_NULL +#define SYS_AUE_thr_new AUE_THR_NEW #define SYS_AUE_sigqueue AUE_NULL -#define SYS_AUE_kmq_open AUE_NULL -#define SYS_AUE_kmq_setattr AUE_NULL -#define SYS_AUE_kmq_timedreceive AUE_NULL -#define SYS_AUE_kmq_timedsend AUE_NULL -#define SYS_AUE_kmq_notify AUE_NULL -#define SYS_AUE_kmq_unlink AUE_NULL +#define SYS_AUE_kmq_open AUE_MQ_OPEN +#define SYS_AUE_kmq_setattr AUE_MQ_SETATTR +#define SYS_AUE_kmq_timedreceive AUE_MQ_TIMEDRECEIVE +#define SYS_AUE_kmq_timedsend AUE_MQ_TIMEDSEND +#define SYS_AUE_kmq_notify AUE_MQ_NOTIFY +#define SYS_AUE_kmq_unlink AUE_MQ_UNLINK #define SYS_AUE_abort2 AUE_NULL #define SYS_AUE_thr_set_name AUE_NULL -#define SYS_AUE_aio_fsync AUE_NULL +#define SYS_AUE_aio_fsync AUE_AIO_FSYNC #define SYS_AUE_rtprio_thread AUE_RTPRIO -#define SYS_AUE_sctp_peeloff AUE_NULL -#define SYS_AUE_sctp_generic_sendmsg AUE_NULL -#define SYS_AUE_sctp_generic_sendmsg_iov AUE_NULL -#define SYS_AUE_sctp_generic_recvmsg AUE_NULL +#define SYS_AUE_sctp_peeloff AUE_SCTP_PEELOFF +#define SYS_AUE_sctp_generic_sendmsg AUE_SCTP_GENERIC_SENDMSG +#define SYS_AUE_sctp_generic_sendmsg_iov AUE_SCTP_GENERIC_SENDMSG_IOV +#define SYS_AUE_sctp_generic_recvmsg AUE_SCTP_GENERIC_RECVMSG #define SYS_AUE_pread AUE_PREAD #define SYS_AUE_pwrite AUE_PWRITE #define SYS_AUE_mmap AUE_MMAP #define SYS_AUE_lseek AUE_LSEEK #define SYS_AUE_truncate AUE_TRUNCATE #define SYS_AUE_ftruncate AUE_FTRUNCATE -#define SYS_AUE_thr_kill2 AUE_KILL +#define SYS_AUE_thr_kill2 AUE_THR_KILL2 #define SYS_AUE_shm_open AUE_SHMOPEN #define SYS_AUE_shm_unlink AUE_SHMUNLINK #define SYS_AUE_cpuset AUE_NULL @@ -2951,9 +2958,9 @@ int freebsd10_pipe(struct thread *, struct freebsd10_pipe_args *); #define SYS_AUE_unlinkat AUE_UNLINKAT #define SYS_AUE_posix_openpt AUE_POSIX_OPENPT #define SYS_AUE_gssd_syscall AUE_NULL -#define SYS_AUE_jail_get AUE_NULL -#define SYS_AUE_jail_set AUE_NULL -#define SYS_AUE_jail_remove AUE_NULL +#define SYS_AUE_jail_get AUE_JAIL_GET +#define SYS_AUE_jail_set AUE_JAIL_SET +#define SYS_AUE_jail_remove AUE_JAIL_REMOVE #define SYS_AUE_closefrom AUE_CLOSEFROM #define SYS_AUE___semctl AUE_SEMCTL #define SYS_AUE_msgctl AUE_MSGCTL @@ -2966,15 +2973,15 @@ int freebsd10_pipe(struct thread *, struct freebsd10_pipe_args *); #define SYS_AUE_pdkill AUE_PDKILL #define SYS_AUE_pdgetpid AUE_PDGETPID #define SYS_AUE_pselect AUE_SELECT -#define SYS_AUE_getloginclass AUE_NULL -#define SYS_AUE_setloginclass AUE_NULL +#define SYS_AUE_getloginclass AUE_GETLOGINCLASS +#define SYS_AUE_setloginclass AUE_SETLOGINCLASS #define SYS_AUE_rctl_get_racct AUE_NULL #define SYS_AUE_rctl_get_rules AUE_NULL #define SYS_AUE_rctl_get_limits AUE_NULL #define SYS_AUE_rctl_add_rule AUE_NULL #define SYS_AUE_rctl_remove_rule AUE_NULL -#define SYS_AUE_posix_fallocate AUE_NULL -#define SYS_AUE_posix_fadvise AUE_NULL +#define SYS_AUE_posix_fallocate AUE_POSIX_FALLOCATE +#define SYS_AUE_posix_fadvise AUE_POSIX_FADVISE #define SYS_AUE_wait6 AUE_WAIT6 #define SYS_AUE_cap_rights_limit AUE_CAP_RIGHTS_LIMIT #define SYS_AUE_cap_ioctls_limit AUE_CAP_IOCTLS_LIMIT @@ -2986,8 +2993,8 @@ int freebsd10_pipe(struct thread *, struct freebsd10_pipe_args *); #define SYS_AUE_chflagsat AUE_CHFLAGSAT #define SYS_AUE_accept4 AUE_ACCEPT #define SYS_AUE_pipe2 AUE_PIPE -#define SYS_AUE_aio_mlock AUE_NULL -#define SYS_AUE_procctl AUE_NULL +#define SYS_AUE_aio_mlock AUE_AIO_MLOCK +#define SYS_AUE_procctl AUE_PROCCTL #define SYS_AUE_ppoll AUE_POLL #define SYS_AUE_futimens AUE_FUTIMES #define SYS_AUE_utimensat AUE_FUTIMESAT diff --git a/freebsd/sys/sys/systm.h b/freebsd/sys/sys/systm.h index 3512c49e..ee799320 100644 --- a/freebsd/sys/sys/systm.h +++ b/freebsd/sys/sys/systm.h @@ -15,7 +15,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -45,6 +45,8 @@ #include #include /* for people using printf mainly */ +__NULLABILITY_PRAGMA_PUSH + #ifndef __rtems__ extern int cold; /* nonzero if we are doing a cold boot */ extern int suspend_blocked; /* block suspend due to pending shutdown */ @@ -150,11 +152,21 @@ void kassert_panic(const char *fmt, ...) __printflike(1, 2); * going to run the thread that holds any lock we need. */ #ifndef __rtems__ -#define SCHEDULER_STOPPED() __predict_false(curthread->td_stopsched) +#define SCHEDULER_STOPPED_TD(td) ({ \ + MPASS((td) == curthread); \ + __predict_false((td)->td_stopsched); \ +}) #else /* __rtems__ */ -#define SCHEDULER_STOPPED() 0 +#define SCHEDULER_STOPPED_TD(td) 0 #endif /* __rtems__ */ +#define SCHEDULER_STOPPED() SCHEDULER_STOPPED_TD(curthread) +/* + * Align variables. + */ +#define __read_mostly __section(".data.read_mostly") +#define __exclusive_cache_line __aligned(CACHE_LINE_SIZE) \ + __section(".data.exclusive_cache_line") /* * XXX the hints declarations are even more misplaced than most declarations * in this file, since they are needed in one file (per arch) and only used @@ -272,25 +284,25 @@ int vsnprintf(char *, size_t, const char *, __va_list) __printflike(3, 0); int vsnrprintf(char *, size_t, int, const char *, __va_list) __printflike(4, 0); int vsprintf(char *buf, const char *, __va_list) __printflike(2, 0); int ttyprintf(struct tty *, const char *, ...) __printflike(2, 3); -int sscanf(const char *, char const *, ...) __nonnull(1) __nonnull(2) __scanflike(2, 3); -int vsscanf(const char *, char const *, __va_list) __nonnull(1) __nonnull(2) __scanflike(2, 0); -long strtol(const char *, char **, int) __nonnull(1); -u_long strtoul(const char *, char **, int) __nonnull(1); +int sscanf(const char *, char const * _Nonnull, ...) __scanflike(2, 3); +int vsscanf(const char * _Nonnull, char const * _Nonnull, __va_list) __scanflike(2, 0); +long strtol(const char *, char **, int); +u_long strtoul(const char *, char **, int); #ifndef __rtems__ -quad_t strtoq(const char *, char **, int) __nonnull(1); -u_quad_t strtouq(const char *, char **, int) __nonnull(1); +quad_t strtoq(const char *, char **, int); +u_quad_t strtouq(const char *, char **, int); #else /* __rtems__ */ long long strtoll(const char *, char **, int); unsigned long long strtoull(const char *, char **, int); -static inline quad_t __nonnull(1) +static inline quad_t strtoq(const char *nptr, char **endptr, int base) { return (strtoll(nptr, endptr, base)); } -static inline u_quad_t __nonnull(1) +static inline u_quad_t strtouq(const char *nptr, char **endptr, int base) { @@ -308,34 +320,34 @@ void hexdump(const void *ptr, int length, const char *hdr, int flags); #define ovbcopy(f, t, l) bcopy((f), (t), (l)) #ifndef __rtems__ -void bcopy(const void *from, void *to, size_t len) __nonnull(1) __nonnull(2); -void bzero(void *buf, size_t len) __nonnull(1); +void bcopy(const void * _Nonnull from, void * _Nonnull to, size_t len); +void bzero(void * _Nonnull buf, size_t len); #else /* __rtems__ */ -#define bcopy(src, dst, len) memmove((dst), (src), (len)) -#define bzero(buf, size) memset((buf), 0, (size)) +#define bcopy(src, dst, len) memmove((dst), (src), (len)) +#define bzero(buf, size) memset((buf), 0, (size)) #endif /* __rtems__ */ -void explicit_bzero(void *, size_t) __nonnull(1); +void explicit_bzero(void * _Nonnull, size_t); -void *memcpy(void *to, const void *from, size_t len) __nonnull(1) __nonnull(2); -void *memmove(void *dest, const void *src, size_t n) __nonnull(1) __nonnull(2); +void *memcpy(void * _Nonnull to, const void * _Nonnull from, size_t len); +void *memmove(void * _Nonnull dest, const void * _Nonnull src, size_t n); -int copystr(const void * __restrict kfaddr, void * __restrict kdaddr, - size_t len, size_t * __restrict lencopied) - __nonnull(1) __nonnull(2); #ifndef __rtems__ -int copyinstr(const void * __restrict udaddr, void * __restrict kaddr, - size_t len, size_t * __restrict lencopied) - __nonnull(1) __nonnull(2); -int copyin(const void * __restrict udaddr, void * __restrict kaddr, - size_t len) __nonnull(1) __nonnull(2); -int copyin_nofault(const void * __restrict udaddr, void * __restrict kaddr, - size_t len) __nonnull(1) __nonnull(2); -int copyout(const void * __restrict kaddr, void * __restrict udaddr, - size_t len) __nonnull(1) __nonnull(2); -int copyout_nofault(const void * __restrict kaddr, void * __restrict udaddr, - size_t len) __nonnull(1) __nonnull(2); +int copystr(const void * _Nonnull __restrict kfaddr, + void * _Nonnull __restrict kdaddr, size_t len, + size_t * __restrict lencopied); +int copyinstr(const void * __restrict udaddr, + void * _Nonnull __restrict kaddr, size_t len, + size_t * __restrict lencopied); +int copyin(const void * _Nonnull __restrict udaddr, + void * _Nonnull __restrict kaddr, size_t len); +int copyin_nofault(const void * _Nonnull __restrict udaddr, + void * _Nonnull __restrict kaddr, size_t len); +int copyout(const void * _Nonnull __restrict kaddr, + void * _Nonnull __restrict udaddr, size_t len); +int copyout_nofault(const void * _Nonnull __restrict kaddr, + void * _Nonnull __restrict udaddr, size_t len); #else /* __rtems__ */ -static inline int __nonnull(1) __nonnull(2) +static inline int copyinstr(const void * __restrict udaddr, void * __restrict kaddr, size_t len, size_t * __restrict lencopied) { @@ -348,7 +360,7 @@ copyinstr(const void * __restrict udaddr, void * __restrict kaddr, return (0); } -static inline int __nonnull(1) __nonnull(2) +static inline int copyin(const void * __restrict udaddr, void * __restrict kaddr, size_t len) { @@ -357,14 +369,14 @@ copyin(const void * __restrict udaddr, void * __restrict kaddr, return (0); } -static inline int __nonnull(1) __nonnull(2) +static inline int copyin_nofault(const void * __restrict udaddr, void * __restrict kaddr, size_t len) { return copyin(udaddr, kaddr, len); } -static inline int __nonnull(1) __nonnull(2) +static inline int copyout(const void * __restrict kaddr, void * __restrict udaddr, size_t len) { @@ -373,7 +385,7 @@ copyout(const void * __restrict kaddr, void * __restrict udaddr, return (0); } -static inline int __nonnull(1) __nonnull(2) +static inline int copyout_nofault(const void * __restrict kaddr, void * __restrict udaddr, size_t len) { @@ -437,7 +449,6 @@ sbintime_t cpu_idleclock(void); void cpu_activeclock(void); void cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt); void cpu_et_frequency(struct eventtimer *et, uint64_t newfreq); -extern int cpu_deepest_sleep; extern int cpu_disable_c2_sleep; extern int cpu_disable_c3_sleep; @@ -505,8 +516,8 @@ static __inline void splx(intrmask_t ipl __unused) { return; } * Common `proc' functions are declared here so that proc.h can be included * less often. */ -int _sleep(void *chan, struct lock_object *lock, int pri, const char *wmesg, - sbintime_t sbt, sbintime_t pr, int flags) __nonnull(1); +int _sleep(void * _Nonnull chan, struct lock_object *lock, int pri, + const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags); #define msleep(chan, mtx, pri, wmesg, timo) \ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), \ tick_sbt * (timo), 0, C_HARDCLOCK) @@ -514,8 +525,8 @@ int _sleep(void *chan, struct lock_object *lock, int pri, const char *wmesg, _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (bt), (pr), \ (flags)) #ifndef __rtems__ -int msleep_spin_sbt(void *chan, struct mtx *mtx, const char *wmesg, - sbintime_t sbt, sbintime_t pr, int flags) __nonnull(1); +int msleep_spin_sbt(void * _Nonnull chan, struct mtx *mtx, + const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags); #else /* __rtems__ */ #define msleep_spin_sbt(chan, mtx, wmesg, sbt, pr, flags) \ msleep_sbt(chan, mtx, 0, wmesg, sbt, pr, flags) @@ -535,8 +546,8 @@ int pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, 0, C_HARDCLOCK) #define tsleep_sbt(chan, pri, wmesg, bt, pr, flags) \ _sleep((chan), NULL, (pri), (wmesg), (bt), (pr), (flags)) -void wakeup(void *chan) __nonnull(1); -void wakeup_one(void *chan) __nonnull(1); +void wakeup(void * chan); +void wakeup_one(void * chan); /* * Common `struct cdev *' stuff are declared here to avoid #include poisoning @@ -580,8 +591,8 @@ void free_unr(struct unrhdr *uh, u_int item); void intr_prof_stack_use(struct thread *td, struct trapframe *frame); -extern void (*softdep_ast_cleanup)(void); - void counted_warning(unsigned *counter, const char *msg); +__NULLABILITY_PRAGMA_POP + #endif /* !_SYS_SYSTM_H_ */ diff --git a/freebsd/sys/sys/taskqueue.h b/freebsd/sys/sys/taskqueue.h index a6c66558..583f796e 100644 --- a/freebsd/sys/sys/taskqueue.h +++ b/freebsd/sys/sys/taskqueue.h @@ -30,7 +30,7 @@ #define _SYS_TASKQUEUE_H_ #ifndef _KERNEL -#error "no user-servicable parts inside" +#error "no user-serviceable parts inside" #endif #include @@ -79,6 +79,7 @@ int taskqueue_start_threads_cpuset(struct taskqueue **tqp, int count, int taskqueue_enqueue(struct taskqueue *queue, struct task *task); int taskqueue_enqueue_timeout(struct taskqueue *queue, struct timeout_task *timeout_task, int ticks); +int taskqueue_poll_is_busy(struct taskqueue *queue, struct task *task); int taskqueue_cancel(struct taskqueue *queue, struct task *task, u_int *pendp); int taskqueue_cancel_timeout(struct taskqueue *queue, diff --git a/freebsd/sys/sys/tty.h b/freebsd/sys/sys/tty.h index 4d082667..a82aedf4 100644 --- a/freebsd/sys/sys/tty.h +++ b/freebsd/sys/sys/tty.h @@ -62,6 +62,7 @@ struct tty { struct mtx *t_mtx; /* TTY lock. */ struct mtx t_mtxobj; /* Per-TTY lock (when not borrowing). */ TAILQ_ENTRY(tty) t_list; /* (l) TTY list entry. */ + int t_drainwait; /* (t) TIOCDRAIN timeout seconds. */ unsigned int t_flags; /* (t) Terminal option flags. */ /* Keep flags in sync with db_show_tty and pstat(8). */ #define TF_NOPREFIX 0x00001 /* Don't prepend "tty" to device name. */ diff --git a/freebsd/sys/sys/ttyqueue.h b/freebsd/sys/sys/ttyqueue.h index 2d1a565a..c8d85d62 100644 --- a/freebsd/sys/sys/ttyqueue.h +++ b/freebsd/sys/sys/ttyqueue.h @@ -69,7 +69,7 @@ struct ttyoutq { #ifdef _KERNEL /* Input queue handling routines. */ -void ttyinq_setsize(struct ttyinq *ti, struct tty *tp, size_t len); +int ttyinq_setsize(struct ttyinq *ti, struct tty *tp, size_t len); void ttyinq_free(struct ttyinq *ti); int ttyinq_read_uio(struct ttyinq *ti, struct tty *tp, struct uio *uio, size_t readlen, size_t flushlen); @@ -136,7 +136,7 @@ void ttyinq_line_iterate_from_reprintpos(struct ttyinq *ti, /* Output queue handling routines. */ void ttyoutq_flush(struct ttyoutq *to); -void ttyoutq_setsize(struct ttyoutq *to, struct tty *tp, size_t len); +int ttyoutq_setsize(struct ttyoutq *to, struct tty *tp, size_t len); void ttyoutq_free(struct ttyoutq *to); size_t ttyoutq_read(struct ttyoutq *to, void *buf, size_t len); int ttyoutq_read_uio(struct ttyoutq *to, struct tty *tp, struct uio *uio); diff --git a/freebsd/sys/sys/ucred.h b/freebsd/sys/sys/ucred.h index ae3fcdeb..bf5d99d8 100644 --- a/freebsd/sys/sys/ucred.h +++ b/freebsd/sys/sys/ucred.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/un.h b/freebsd/sys/sys/un.h index 7837e76e..27d6a499 100644 --- a/freebsd/sys/sys/un.h +++ b/freebsd/sys/sys/un.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/unpcb.h b/freebsd/sys/sys/unpcb.h index cdb5c4d0..619b68dd 100644 --- a/freebsd/sys/sys/unpcb.h +++ b/freebsd/sys/sys/unpcb.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/user.h b/freebsd/sys/sys/user.h index 9dad4d0a..762841ee 100644 --- a/freebsd/sys/sys/user.h +++ b/freebsd/sys/sys/user.h @@ -12,7 +12,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/vmmeter.h b/freebsd/sys/sys/vmmeter.h index 31312b0a..b5d0ef14 100644 --- a/freebsd/sys/sys/vmmeter.h +++ b/freebsd/sys/sys/vmmeter.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/sys/vnode.h b/freebsd/sys/sys/vnode.h index e125b0de..0d8c6521 100644 --- a/freebsd/sys/sys/vnode.h +++ b/freebsd/sys/sys/vnode.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * diff --git a/freebsd/sys/vm/uma.h b/freebsd/sys/vm/uma.h index 1ab51c89..605ba9b1 100644 --- a/freebsd/sys/vm/uma.h +++ b/freebsd/sys/vm/uma.h @@ -242,7 +242,7 @@ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor, * Definitions for uma_zcreate flags * * These flags share space with UMA_ZFLAGs in uma_int.h. Be careful not to - * overlap when adding new features. 0xf0000000 is in use by uma_int.h. + * overlap when adding new features. 0xff000000 is in use by uma_int.h. */ #define UMA_ZONE_PAGEABLE 0x0001 /* Return items not fully backed by physical memory XXX Not yet */ diff --git a/freebsd/sys/vm/uma_core.c b/freebsd/sys/vm/uma_core.c index 357895c0..58fd5336 100644 --- a/freebsd/sys/vm/uma_core.c +++ b/freebsd/sys/vm/uma_core.c @@ -66,6 +66,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -1270,15 +1271,16 @@ keg_small_init(uma_keg_t keg) u_int memused; u_int wastedspace; u_int shsize; + u_int slabsize; if (keg->uk_flags & UMA_ZONE_PCPU) { u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU; - keg->uk_slabsize = sizeof(struct pcpu); + slabsize = sizeof(struct pcpu); keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu), PAGE_SIZE); } else { - keg->uk_slabsize = UMA_SLAB_SIZE; + slabsize = UMA_SLAB_SIZE; keg->uk_ppera = 1; } @@ -1288,8 +1290,8 @@ keg_small_init(uma_keg_t keg) * allocation bits for we round it up. */ rsize = keg->uk_size; - if (rsize < keg->uk_slabsize / SLAB_SETSIZE) - rsize = keg->uk_slabsize / SLAB_SETSIZE; + if (rsize < slabsize / SLAB_SETSIZE) + rsize = slabsize / SLAB_SETSIZE; if (rsize & keg->uk_align) rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); keg->uk_rsize = rsize; @@ -1303,12 +1305,12 @@ keg_small_init(uma_keg_t keg) else shsize = sizeof(struct uma_slab); - keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize; + keg->uk_ipers = (slabsize - shsize) / rsize; KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE, ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers)); memused = keg->uk_ipers * rsize + shsize; - wastedspace = keg->uk_slabsize - memused; + wastedspace = slabsize - memused; /* * We can't do OFFPAGE if we're internal or if we've been @@ -1329,9 +1331,9 @@ keg_small_init(uma_keg_t keg) * Historically this was not done because the VM could not * efficiently handle contiguous allocations. */ - if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) && - (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) { - keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize; + if ((wastedspace >= slabsize / UMA_MAX_WASTE) && + (keg->uk_ipers < (slabsize / keg->uk_rsize))) { + keg->uk_ipers = slabsize / keg->uk_rsize; KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE, ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers)); #ifdef UMA_DEBUG @@ -1340,8 +1342,8 @@ keg_small_init(uma_keg_t keg) "maximum wasted space allowed = %d, " "calculated ipers = %d, " "new wasted space = %d\n", keg->uk_name, wastedspace, - keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers, - keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize); + slabsize / UMA_MAX_WASTE, keg->uk_ipers, + slabsize - keg->uk_ipers * keg->uk_rsize); #endif keg->uk_flags |= UMA_ZONE_OFFPAGE; } @@ -1374,7 +1376,6 @@ keg_large_init(uma_keg_t keg) ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__)); keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE); - keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE; keg->uk_ipers = 1; keg->uk_rsize = keg->uk_size; @@ -1426,7 +1427,6 @@ keg_cachespread_init(uma_keg_t keg) pages = MIN(pages, (128 * 1024) / PAGE_SIZE); keg->uk_rsize = rsize; keg->uk_ppera = pages; - keg->uk_slabsize = UMA_SLAB_SIZE; keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize; keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB; KASSERT(keg->uk_ipers <= SLAB_SETSIZE, @@ -1566,7 +1566,8 @@ keg_ctor(void *mem, int size, void *udata, int flags) printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n", zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags, keg->uk_ipers, keg->uk_ppera, - (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free); + (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free, + keg->uk_free); #endif LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); @@ -3018,7 +3019,7 @@ uma_zone_set_max(uma_zone_t zone, int nitems) keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera; if (keg->uk_maxpages * keg->uk_ipers < nitems) keg->uk_maxpages += keg->uk_ppera; - nitems = keg->uk_maxpages * keg->uk_ipers; + nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers; KEG_UNLOCK(keg); return (nitems); @@ -3035,7 +3036,7 @@ uma_zone_get_max(uma_zone_t zone) if (keg == NULL) return (0); KEG_LOCK(keg); - nitems = keg->uk_maxpages * keg->uk_ipers; + nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers; KEG_UNLOCK(keg); return (nitems); @@ -3197,13 +3198,14 @@ uma_zone_reserve_kva(uma_zone_t zone, int count) if (pages * keg->uk_ipers < count) pages++; + pages *= keg->uk_ppera; #ifdef UMA_MD_SMALL_ALLOC if (keg->uk_ppera > 1) { #else if (1) { #endif - kva = kva_alloc((vm_size_t)pages * UMA_SLAB_SIZE); + kva = kva_alloc((vm_size_t)pages * PAGE_SIZE); if (kva == 0) return (0); } else @@ -3305,6 +3307,11 @@ uma_reclaim_worker(void *arg __unused) "umarcl", 0); if (uma_reclaim_needed) { uma_reclaim_needed = 0; +#ifndef __rtems__ + sx_xunlock(&uma_drain_lock); + EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM); + sx_xlock(&uma_drain_lock); +#endif /* __rtems__ */ uma_reclaim_locked(true); } } @@ -3405,8 +3412,8 @@ uma_print_keg(uma_keg_t keg) "out %d free %d limit %d\n", keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags, keg->uk_ipers, keg->uk_ppera, - (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free, - (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers); + (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free, + keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers); printf("Part slabs:\n"); LIST_FOREACH(slab, &keg->uk_part_slab, us_link) slab_print(slab); diff --git a/freebsd/sys/vm/uma_int.h b/freebsd/sys/vm/uma_int.h index 679e2518..35656801 100644 --- a/freebsd/sys/vm/uma_int.h +++ b/freebsd/sys/vm/uma_int.h @@ -210,7 +210,6 @@ struct uma_keg { vm_offset_t uk_kva; /* Zone base KVA */ uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */ - uint16_t uk_slabsize; /* Slab size for this keg */ uint16_t uk_pgoff; /* Offset to uma_slab struct */ uint16_t uk_ppera; /* pages per allocation from backend */ uint16_t uk_ipers; /* Items per slab */ diff --git a/freebsd/sys/vm/vm.h b/freebsd/sys/vm/vm.h index 1df51fed..a41bc0b1 100644 --- a/freebsd/sys/vm/vm.h +++ b/freebsd/sys/vm/vm.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -68,6 +68,7 @@ typedef char vm_inherit_t; /* inheritance codes */ #define VM_INHERIT_SHARE ((vm_inherit_t) 0) #define VM_INHERIT_COPY ((vm_inherit_t) 1) #define VM_INHERIT_NONE ((vm_inherit_t) 2) +#define VM_INHERIT_ZERO ((vm_inherit_t) 3) #define VM_INHERIT_DEFAULT VM_INHERIT_COPY typedef u_char vm_prot_t; /* protection codes */ diff --git a/freebsd/sys/vm/vm_extern.h b/freebsd/sys/vm/vm_extern.h index dcb2f3a6..63248e64 100644 --- a/freebsd/sys/vm/vm_extern.h +++ b/freebsd/sys/vm/vm_extern.h @@ -10,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -136,6 +136,5 @@ struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset); void vm_imgact_unmap_page(struct sf_buf *sf); void vm_thread_dispose(struct thread *td); int vm_thread_new(struct thread *td, int pages); -int vm_mlock(struct proc *, struct ucred *, const void *, size_t); #endif /* _KERNEL */ #endif /* !_VM_EXTERN_H_ */ diff --git a/freebsd/sys/x86/pci/pci_bus.c b/freebsd/sys/x86/pci/pci_bus.c index 1b43f53f..3ef87cf1 100644 --- a/freebsd/sys/x86/pci/pci_bus.c +++ b/freebsd/sys/x86/pci/pci_bus.c @@ -197,7 +197,7 @@ legacy_pcib_is_host_bridge(int bus, int slot, int func, * For the 450nx chipset, there is a whole bundle of * things pretending to be host bridges. The MIOC will * be seen first and isn't really a pci bridge (the - * actual busses are attached to the PXB's). We need to + * actual buses are attached to the PXB's). We need to * read the registers of the MIOC to figure out the * bus numbers for the PXB channels. * @@ -568,7 +568,7 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which, * * If no memory preference is given, use upper 32MB slot most BIOSes * use for their memory window. This is typically only used on older - * laptops that don't have PCI busses behind a PCI bridge, so assuming + * laptops that don't have PCI buses behind a PCI bridge, so assuming * > 32MB is likely OK. * * However, this can cause problems for other chipsets, so we make @@ -665,6 +665,7 @@ static device_method_t legacy_pcib_methods[] = { DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix), DEVMETHOD(pcib_release_msix, pcib_release_msix), DEVMETHOD(pcib_map_msi, legacy_pcib_map_msi), + DEVMETHOD(pcib_request_feature, pcib_request_feature_allow), DEVMETHOD_END }; @@ -727,7 +728,7 @@ DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0); #ifdef __HAVE_PIR /* - * Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges + * Provide a PCI-PCI bridge driver for PCI buses behind PCI-PCI bridges * that appear in the PCIBIOS Interrupt Routing Table to use the routing * table for interrupt routing when possible. */ -- cgit v1.2.3